Analyze AFM tip solvation

This notebook demonstrates deposition of an SDS adsorption layer on a non-spherical AFM tip model.

Initialization

IPython magic

In [13]:
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [14]:
%aimport
Modules to reload:
all-except-skipped

Modules to skip:

Imports

In [15]:
import ase.io # here used for reading pdb files
from ase.visualize import view
from ase.visualize.plot import plot_atoms # has nasty offset issues
from cycler import cycler # here used for cycling through colors in plots
import datetime
import fabric # for pythonic ssh connections
from fireworks import LaunchPad, Firework, Tracker, Workflow 
from fireworks import FileTransferTask, PyTask, ScriptTask

# FireWorks functionality 
from fireworks import Firework, LaunchPad, ScriptTask, Workflow
from fireworks.user_objects.firetasks.templatewriter_task import TemplateWriterTask
from fireworks.user_objects.firetasks.filepad_tasks import AddFilesTask, GetFilesTask, GetFilesByQueryTask
from imteksimfw.fireworks.user_objects.firetasks.cmd_tasks import CmdTask
from fireworks.utilities.filepad import FilePad # direct FilePad access, similar to the familiar LaunchPad

from collections.abc import Iterable
import glob
import gc # manually clean up memory with gc.collect()
import gromacs # GromacsWrapper, here used for evoking gmc commands, reading and writing .ndx files
# from io import StringIO, TextIOWrapper
import io
from IPython.display import display, Image #, Video # display image files within notebook
from ipywidgets import Video  # display video within notebook
import itertools # for products of iterables
import json # generic serialization of lists and dicts
import jinja2 # here used for filling packmol input script template
import jinja2.meta # for gathering variables in a jinja2 template
import logging 
import matplotlib.pyplot as plt
import MDAnalysis as mda # here used for reading and analyzing gromacs trajectories
import MDAnalysis.analysis.rdf as mda_rdf
import MDAnalysis.analysis.rms as mda_rms
from mpl_toolkits.mplot3d import Axes3D # here used for 3d point cloud scatter plot
import miniball # finds minimum bounding sphere of a point set
import nglview
import numpy as np
import os, os.path
import pandas as pd
import panedr # reads GROMACS edr into pandas df, requires pandas and pbr
import parmed as pmd # has quite a few advantages over ASE when it comes to parsing pdb
from pprint import pprint
import pymongo # for sorting in queries
import scipy.constants as sc
import subprocess # used for evoking external packmol
import sys
import tempfile
import yaml

GromacsWrapper might need a file ~/.gromacswrapper.cfg with content

[Gromacs]
tools = gmx gmx_d 
# gmx_mpi_d gmx_mpi_d

# name of the logfile that is written to the current directory
logfilename = gromacs.log

# loglevels (see Python's logging module for details)
#   ERROR   only fatal errors
#   WARN    only warnings
#   INFO    interesting messages
#   DEBUG   everything

# console messages written to screen
loglevel_console = INFO

# file messages written to logfilename
loglevel_file = DEBUG

in order to know the GROMACS executables it is allowed to use. Otherwise, calls to gmx_mpi or gmx_mpi_d without MPI wrapper might lead to MPI warnings in output that cause GromacsWrapper to fail.

Logging

In [16]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

ParmEd needs to know the GROMACS topology folder, usually get this from envionment variable GMXLIB:

Function definitions

In [17]:
def find_undeclared_variables(infile):
    """identify all variables evaluated in a jinja 2 template file"""
    env = jinja2.Environment()
    with open(infile) as template_file:
        parsed = env.parse(template_file.read())

    undefined = jinja2.meta.find_undeclared_variables(parsed)
    return undefined
In [18]:
def memuse():
    """Quick overview on memory usage of objects in Jupyter notebook"""
    # https://stackoverflow.com/questions/40993626/list-memory-usage-in-ipython-and-jupyter
    # These are the usual ipython objects, including this one you are creating
    ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

    # Get a sorted list of the objects and their sizes
    return sorted([(x, sys.getsizeof(globals().get(x))) for x in dir(sys.modules['__main__']) if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

Global settings

In [19]:
# pandas settings
pd.options.display.max_rows = 200
pd.options.display.max_columns = 16
pd.options.display.max_colwidth = 256
In [20]:
os.environ['GMXLIB'] = '/gmx_top'
In [21]:
# pmd.gromacs.GROMACS_TOPDIR = os.environ['GMXLIB']
pmd.gromacs.GROMACS_TOPDIR = '/gmx_top'
In [22]:
# prefix = '/mnt/dat/work/testuser/indenter/sandbox/20191110_packmol'
prefix = '/mnt/dat/work'
In [23]:
work_prefix = '/mnt/dat/work/tmp'
In [24]:
try:
    os.mkdir(work_prefix)
except FileExistsError as exc:
    print(exc)
[Errno 17] File exists: '/mnt/dat/work/tmp'
In [25]:
os.chdir(work_prefix)
In [26]:
# the FireWorks LaunchPad
lp = LaunchPad.auto_load() #Define the server and database
# FilePad behaves analogous to LaunchPad
fp = FilePad.auto_load()

Conversion from LAMMPS data format to PDB

The following bash / tcl snippet converts a LAMMPS data file to PDB, assigning the desired names as mapped in a yaml file

#!/bin/bash
# echo "package require jlhvmd; jlh lmp2pdb indenter.lammps indenter.pdb" | vmd -eofexit
vmd -eofexit << 'EOF'
package require jlhvmd
topo readlammpsdata indenter.lammps
jlh type2name SDS_type2name.yaml
jlh name2res  SDS_name2res.yaml
set sel [atomselect top all]
$sel writepdb indenter.pdb
EOF

pdb_chain.py indenter.pdb > indenter_wo_chainid.pdb
pdb_reres_by_atom_9999.py indenter_wo_chainid.pdb > indenter_reres.pdb

Requires

Overview

Overview on projects in database

In [27]:
query = {'metadata.datetime': {'$gt': '2020'} }
In [28]:
fp.filepad.count_documents(query)
Out[28]:
6134
In [29]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 'project': '$metadata.project' },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # pull 'project' field up in hierarchy
        "$addFields": { 
            "project": "$_id.project",
        },
    },
    {  # drop nested '_id.project'
        "$project": { 
            "_id": False 
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

# sort_aggregation
#aggregation_pipeline = [ match_aggregation, group_aggregation, set_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [c for c in cursor]
res_df = pd.DataFrame(data=res) # pandas Dataframe is just nice for printing in notebook
In [30]:
res_df
Out[30]:
object_count earliest latest project
0 1674 2020-07-19 22:52:59.921460 2020-07-19 22:53:01.334582 2020-07-19-passivation
1 278 2020-07-19 00:39:08.874821 2020-07-19 00:39:09.172045 2020-07-19-passivation-trial-a
2 278 2020-07-19 00:24:15.492750 2020-07-19 00:24:15.791253 2020-07-18-passivation-trial-c
3 152 2020-07-18 22:33:35.817622 2020-07-18 22:33:36.072923 2020-07-18-passivation-trial-b
4 278 2020-07-18 17:24:37.304667 2020-07-18 17:24:37.643817 2020-07-18-passivation-trial
5 278 2020-07-18 15:13:57.808691 2020-07-18 15:13:58.077915 2020-08-18-passivation-trial
6 121 2020-07-17 21:44:13.197047 2020-07-17 21:44:13.508164 2020-07-17-passivation-trial
7 1386 2020-07-09 01:30:15.276530 2020-07-09 01:30:16.819138 2020-07-09-passivation
8 350 2020-07-08 13:31:27.106684 2020-07-08 14:02:41.994152 2020-07-08-passiv-trial-revisited
9 236 2020-07-02 19:37:19.035754 2020-07-02 19:37:19.770123 2020-07-03-passiv-trial-revisited
10 54 2020-07-02 03:49:00.103034 2020-07-02 03:49:00.325450 2020-07-01-passiv-trial-revisited
11 6 2020-05-11 15:34:17.973141 2020-05-11 15:34:18.054752 2020-05-11-2nd-passiv-trial
12 64 2020-05-11 00:46:35.427907 2020-05-11 00:46:35.637375 2020-05-11-passiv-trial
13 76 2020-05-09 23:23:58.405636 2020-05-09 23:23:58.629348 2020-05-10-passiv-trial
14 54 2020-05-09 18:06:29.244729 2020-05-09 18:06:29.470488 2020-05-09-parametric
15 276 2020-05-08 17:40:43.151560 2020-05-08 20:09:19.785964 2020-05-08-final
16 8 2020-05-08 16:46:54.125267 2020-05-08 16:46:55.061579 2020-05-08-dtool-trial
17 102 2020-05-06 19:57:34.090873 2020-05-06 23:02:33.696573 2020-05-06-indenter-passivation-trial
18 4 2020-05-04 19:56:27.458671 2020-05-04 19:56:27.458686 2020-05-04-gmx-em-dtool-trial
19 1 2020-05-04 17:47:46.398832 2020-05-04 17:47:46.398832 2020-04-29-gmx-nvt-trial
20 15 2020-04-29 20:03:40.694070 2020-04-29 21:43:41.499280 2020-04-23-gmx-nvt-trial
21 102 2020-04-23 00:42:50.738462 2020-04-23 12:29:04.927501 2020-04-23-indenter-passivation-trial
22 6 2020-04-22 23:52:50.724373 2020-04-22 23:52:50.724774 2020-04-22-gmx-nvt-trial
23 30 2020-04-22 20:12:52.975107 2020-04-22 20:12:52.992097 2020-04-22-intermediate-trial
24 12 2020-04-22 15:35:33.694291 2020-04-22 19:37:27.812809 2020-04-22-trajectory-rendering-trial
25 23 2020-04-22 00:22:22.239748 2020-04-22 00:22:22.241080 2020-04-21-gmx-chain-wf-trial
26 2 2020-04-21 21:08:51.961011 2020-04-21 21:08:51.961024 2020-04-21-gmx-solvate-trial
27 42 2020-04-21 17:33:16.919815 2020-04-21 23:19:49.893920 2020-04-21-intermediate-trial
28 7 2020-04-21 15:35:02.223005 2020-04-21 15:35:02.223026 2020-04-21-gmx-pull-trial
29 15 2020-04-15 12:12:58.569894 2020-04-21 14:39:06.617272 2020-04-15-gmx-pull-prep-trial
30 74 2020-04-15 01:58:21.918222 2020-04-21 13:38:43.462681 2020-04-15-intermediate-trial
31 4 2020-04-15 00:28:04.783910 2020-04-15 00:28:04.783995 2020-04-15-gmx-em-trial
32 3 2020-04-14 21:19:31.298223 2020-04-14 21:19:31.298232 2020-04-14-gmx-prep-trial
33 2 2020-04-14 17:14:15.144625 2020-04-14 20:23:20.587799 2020-04-14-packmol-trial
34 2 2020-04-03 01:25:09.142195 2020-04-07 16:18:42.232568 2020-04-02-surfactant-molecule-measures-trial
35 5 2020-04-02 21:18:13.804918 2020-04-12 20:56:30.319559 2020-03-31-local-trial
36 39 2020-03-25 12:11:11.906332 2020-03-25 22:47:26.604903 juwels-pull-2020-03-17
37 8 2020-03-13 12:33:31.922163 2020-03-13 12:33:31.922497 juwels-afm-probe-solvation-trial-a-2020-03-13
38 10 2020-03-12 12:31:29.329732 2020-03-12 15:55:54.013161 juwels-gromacs-em-2020-03-12
39 42 2020-03-11 18:45:59.775161 2020-03-12 00:15:43.047626 juwels-gromacs-prep-2020-03-11
40 15 2020-03-09 18:13:09.455387 2020-03-09 23:30:45.006483 juwels-packmol-2020-03-09

Overview on steps in project

In [31]:
project_id = '2020-07-19-passivation'
In [32]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
}
In [33]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[33]:
1687
In [34]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [35]:
res_df
Out[35]:
step earliest latest object_count
0 GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.116576 2020-07-19 22:53:01.334582 252
1 GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.105618 2020-07-19 22:53:01.323013 288
2 GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.094225 2020-07-19 22:53:01.311597 252
3 GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.082429 2020-07-19 22:53:01.300371 240
4 GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.070735 2020-07-19 22:53:01.289069 24
5 GromacsPull:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.070520 2020-07-19 22:53:01.288914 276
6 GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.059439 2020-07-19 22:53:01.277881 48
7 GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.059082 2020-07-19 22:53:01.277341 228
8 GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.048039 2020-07-19 22:53:01.264820 36
9 SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.047747 2020-07-19 22:53:01.264537 24
10 PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:53:00.006355 2020-07-19 22:53:00.006362 2
11 SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:52:59.975511 2020-07-19 22:52:59.975523 2
12 IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad 2020-07-19 22:52:59.921460 2020-07-19 22:52:59.921476 2
13 IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles None None 1
14 GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles None None 1
15 GromacsRelaxation:GromacsRelaxationEquilibrationMain:push_infiles None None 1
16 SphericalSurfactantPacking:push_infiles None None 2
17 GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles None None 1
18 SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles None None 1
19 GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles None None 1
20 GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles None None 2
21 GromacsPullPrep:GromacsPullPrepMain:push_infiles None None 2
22 GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles None None 1
In [36]:
res_df['step'].values
Out[36]:
array(['GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',
       ' GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad',
       'SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad',
       'PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad',
       'SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad',
       'IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad',
       'IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles',
       'GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles',
       'GromacsRelaxation:GromacsRelaxationEquilibrationMain:push_infiles',
       'SphericalSurfactantPacking:push_infiles',
       'GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles',
       'SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles',
       'GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles',
       'GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles',
       'GromacsPullPrep:GromacsPullPrepMain:push_infiles',
       'GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles'],
      dtype=object)

Overview on objects in project

In [37]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
}
In [38]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[38]:
1687
In [39]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'step', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [40]:
res_df
Out[40]:
type step name earliest latest object_count
0 mp4_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116638 2020-07-19 22:53:01.334582 12
1 surfactant_tail_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116633 2020-07-19 22:53:01.334580 12
2 surfactant_head_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116631 2020-07-19 22:53:01.334577 12
3 substrate_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116628 2020-07-19 22:53:01.334575 12
4 counterion_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116626 2020-07-19 22:53:01.334569 12
5 surfactant_tail_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116623 2020-07-19 22:53:01.334567 12
6 surfactant_head_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116621 2020-07-19 22:53:01.334565 12
7 surfactant_head_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116619 2020-07-19 22:53:01.334562 12
8 substrate_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116616 2020-07-19 22:53:01.334560 12
9 substrate_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116614 2020-07-19 22:53:01.334558 12
10 substrate_substrate_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116612 2020-07-19 22:53:01.334556 12
11 counterion_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116609 2020-07-19 22:53:01.334554 12
12 counterion_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116607 2020-07-19 22:53:01.334551 12
13 counterion_substrate_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116604 2020-07-19 22:53:01.334549 12
14 counterion_counterion_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116602 2020-07-19 22:53:01.334547 12
15 index_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116600 2020-07-19 22:53:01.334544 12
16 topology_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116597 2020-07-19 22:53:01.334542 12
17 data_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116587 2020-07-19 22:53:01.334539 12
18 trajectory_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116585 2020-07-19 22:53:01.334537 12
19 energy_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116582 2020-07-19 22:53:01.334534 12
20 log_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.116576 2020-07-19 22:53:01.334529 12
21 mp4_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105677 2020-07-19 22:53:01.323013 13
22 surfactant_tail_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105675 2020-07-19 22:53:01.323011 13
23 surfactant_head_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105671 2020-07-19 22:53:01.323009 13
24 substrate_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105667 2020-07-19 22:53:01.323006 13
25 counterion_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105660 2020-07-19 22:53:01.323004 13
26 surfactant_tail_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105657 2020-07-19 22:53:01.323001 13
27 surfactant_head_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105655 2020-07-19 22:53:01.322999 14
28 surfactant_head_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105653 2020-07-19 22:53:01.322997 14
29 substrate_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105650 2020-07-19 22:53:01.322995 14
30 substrate_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105648 2020-07-19 22:53:01.322992 14
31 substrate_substrate_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105646 2020-07-19 22:53:01.322990 14
32 counterion_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105643 2020-07-19 22:53:01.322988 14
33 counterion_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105641 2020-07-19 22:53:01.322986 14
34 counterion_substrate_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105638 2020-07-19 22:53:01.322983 14
35 counterion_counterion_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105636 2020-07-19 22:53:01.322981 14
36 index_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105634 2020-07-19 22:53:01.322979 14
37 topology_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105631 2020-07-19 22:53:01.322976 14
38 data_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105629 2020-07-19 22:53:01.322974 14
39 trajectory_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105626 2020-07-19 22:53:01.322972 14
40 energy_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105623 2020-07-19 22:53:01.322969 14
41 log_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.105618 2020-07-19 22:53:01.322963 14
42 mp4_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094277 2020-07-19 22:53:01.311597 12
43 surfactant_tail_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094274 2020-07-19 22:53:01.311594 12
44 surfactant_head_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094272 2020-07-19 22:53:01.311588 12
45 substrate_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094270 2020-07-19 22:53:01.311585 12
46 counterion_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094267 2020-07-19 22:53:01.311583 12
47 surfactant_tail_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094265 2020-07-19 22:53:01.311580 12
48 surfactant_head_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094262 2020-07-19 22:53:01.311578 12
49 surfactant_head_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094260 2020-07-19 22:53:01.311575 12
50 substrate_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094258 2020-07-19 22:53:01.311573 12
51 substrate_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094255 2020-07-19 22:53:01.311571 12
52 substrate_substrate_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094253 2020-07-19 22:53:01.311568 12
53 counterion_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094251 2020-07-19 22:53:01.311566 12
54 counterion_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094248 2020-07-19 22:53:01.311563 12
55 counterion_substrate_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094246 2020-07-19 22:53:01.311561 12
56 counterion_counterion_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094244 2020-07-19 22:53:01.311558 12
57 index_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094241 2020-07-19 22:53:01.311556 12
58 topology_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094239 2020-07-19 22:53:01.311553 12
59 data_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094236 2020-07-19 22:53:01.311551 12
60 trajectory_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094234 2020-07-19 22:53:01.311548 12
61 energy_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094231 2020-07-19 22:53:01.311545 12
62 log_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.094225 2020-07-19 22:53:01.311533 12
63 mp4_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082476 2020-07-19 22:53:01.300371 12
64 surfactant_tail_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082474 2020-07-19 22:53:01.300369 12
65 surfactant_head_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082471 2020-07-19 22:53:01.300367 12
66 substrate_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082469 2020-07-19 22:53:01.300364 12
67 counterion_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082467 2020-07-19 22:53:01.300362 12
68 surfactant_tail_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082464 2020-07-19 22:53:01.300360 12
69 surfactant_head_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082462 2020-07-19 22:53:01.300357 12
70 surfactant_head_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082460 2020-07-19 22:53:01.300355 12
71 substrate_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082458 2020-07-19 22:53:01.300353 12
72 substrate_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082455 2020-07-19 22:53:01.300351 12
73 substrate_substrate_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082453 2020-07-19 22:53:01.300348 12
74 counterion_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082451 2020-07-19 22:53:01.300346 12
75 counterion_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082449 2020-07-19 22:53:01.300344 12
76 counterion_substrate_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082446 2020-07-19 22:53:01.300341 12
77 counterion_counterion_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082444 2020-07-19 22:53:01.300339 12
78 topology_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082442 2020-07-19 22:53:01.300337 12
79 data_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082439 2020-07-19 22:53:01.300334 12
80 trajectory_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082437 2020-07-19 22:53:01.300332 12
81 energy_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082434 2020-07-19 22:53:01.300329 12
82 log_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.082429 2020-07-19 22:53:01.300323 12
83 topology_file GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070740 2020-07-19 22:53:01.289069 12
84 data_file GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070735 2020-07-19 22:53:01.289065 12
85 mp4_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070575 2020-07-19 22:53:01.288914 12
86 surfactant_tail_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070573 2020-07-19 22:53:01.288912 12
87 surfactant_head_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070571 2020-07-19 22:53:01.288908 12
88 substrate_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070568 2020-07-19 22:53:01.288906 12
89 counterion_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070566 2020-07-19 22:53:01.288904 12
90 surfactant_tail_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070564 2020-07-19 22:53:01.288901 12
91 surfactant_head_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070561 2020-07-19 22:53:01.288899 12
92 surfactant_head_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070559 2020-07-19 22:53:01.288896 12
93 substrate_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070557 2020-07-19 22:53:01.288894 12
94 substrate_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070554 2020-07-19 22:53:01.288891 12
95 substrate_substrate_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070552 2020-07-19 22:53:01.288889 12
96 counterion_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070550 2020-07-19 22:53:01.288883 12
97 counterion_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070547 2020-07-19 22:53:01.288880 12
98 counterion_substrate_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070545 2020-07-19 22:53:01.288878 12
99 counterion_counterion_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070543 2020-07-19 22:53:01.288876 12
100 topology_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070541 2020-07-19 22:53:01.288873 12
101 pullx_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070538 2020-07-19 22:53:01.288871 12
102 pullf_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070536 2020-07-19 22:53:01.288869 12
103 data_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070534 2020-07-19 22:53:01.288867 12
104 compressed_trajectory_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070531 2020-07-19 22:53:01.288864 12
105 trajectory_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070529 2020-07-19 22:53:01.288862 12
106 energy_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070526 2020-07-19 22:53:01.288859 12
107 log_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.070520 2020-07-19 22:53:01.288853 12
108 input_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059448 2020-07-19 22:53:01.277881 12
109 index_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059446 2020-07-19 22:53:01.277878 12
110 topology_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059443 2020-07-19 22:53:01.277873 12
111 data_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059439 2020-07-19 22:53:01.277866 12
112 mp4_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059128 2020-07-19 22:53:01.277341 12
113 surfactant_tail_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059126 2020-07-19 22:53:01.277336 12
114 surfactant_head_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059123 2020-07-19 22:53:01.277331 12
115 substrate_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059121 2020-07-19 22:53:01.277327 12
116 counterion_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059119 2020-07-19 22:53:01.277323 12
117 surfactant_tail_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059116 2020-07-19 22:53:01.277319 12
118 surfactant_head_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059114 2020-07-19 22:53:01.277316 12
119 surfactant_head_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059112 2020-07-19 22:53:01.277312 12
120 substrate_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059110 2020-07-19 22:53:01.277309 12
121 substrate_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059107 2020-07-19 22:53:01.277305 12
122 substrate_substrate_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059105 2020-07-19 22:53:01.277301 12
123 counterion_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059103 2020-07-19 22:53:01.277298 12
124 counterion_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059100 2020-07-19 22:53:01.277294 12
125 counterion_substrate_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059098 2020-07-19 22:53:01.277290 12
126 counterion_counterion_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059096 2020-07-19 22:53:01.277286 12
127 data_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059094 2020-07-19 22:53:01.277281 12
128 trajectory_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059091 2020-07-19 22:53:01.277279 12
129 energy_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059088 2020-07-19 22:53:01.277276 12
130 log_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.059082 2020-07-19 22:53:01.277270 12
131 restraint_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.048046 2020-07-19 22:53:01.264820 12
132 topology_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.048043 2020-07-19 22:53:01.264817 12
133 data_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.048039 2020-07-19 22:53:01.264814 12
134 png_file SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.047755 2020-07-19 22:53:01.264537 12
135 data_file SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.047747 2020-07-19 22:53:01.264528 12
136 png_file PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.006362 2020-07-19 22:53:00.006362 1
137 indenter_file PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:53:00.006355 2020-07-19 22:53:00.006355 1
138 png_file SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:52:59.975523 2020-07-19 22:52:59.975523 1
139 indenter_file SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:52:59.975511 2020-07-19 22:52:59.975511 1
140 png_file IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:52:59.921476 2020-07-19 22:52:59.921476 1
141 indenter_file IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-19 22:52:59.921460 2020-07-19 22:52:59.921460 1
142 input GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles npt.mdp None None 1
143 input GromacsPullPrep:GromacsPullPrepMain:push_infiles pull.mdp.template None None 1
144 template SphericalSurfactantPacking:push_infiles sphere.inp.template None None 1
145 data SphericalSurfactantPacking:push_infiles 1_NA.pdb None None 1
146 input GromacsPullPrep:GromacsPullPrepMain:push_infiles sys.top.template None None 1
147 input GromacsRelaxation:GromacsRelaxationEquilibrationMain:push_infiles relax.mdp None None 1
148 input GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles nvt.mdp None None 1
149 input GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles em.mdp None None 1
150 surfactant_file SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles 1_SDS.pdb None None 1
151 indenter_file IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles AU_111_r_25.pdb None None 1
152 input GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles movie.pml.template None None 1
153 input GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles renumber_png.sh None None 1
154 input GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles em_solvated.mdp None None 1

Overview on images by distinct steps

In [41]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
}
In [42]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[42]:
15
In [43]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [44]:
res_df
Out[44]:
step type name earliest latest object_count
0 SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-19 22:53:00.047755 2020-07-19 22:53:01.264537 12
1 PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-19 22:53:00.006362 2020-07-19 22:53:00.006362 1
2 SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-19 22:52:59.975523 2020-07-19 22:52:59.975523 1
3 IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-19 22:52:59.921476 2020-07-19 22:52:59.921476 1
In [45]:
res_df["step"][0]
Out[45]:
'SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad'

Packing visualization

Indenter bounding sphere

In [46]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'IndenterBoundingSphere'}
}
In [47]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[47]:
1
In [48]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [49]:
for obj in obj_list:
    display(obj)

Surfactant measures

In [50]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'SurfactantMoleculeMeasures'}
}
In [51]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[51]:
1
In [52]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [53]:
obj_list[0]
Out[53]:

Packing constraints

In [54]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'PackingConstraintSpheres'}
}
In [55]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[55]:
1
In [56]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [57]:
obj_list[0]
Out[57]:

Packed film

In [58]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'SphericalSurfactantPacking'}
}
In [59]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[59]:
12
In [60]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
............
In [61]:
for obj in obj_list:
    display(obj)

Energy minimization analysis

Overview on objects in step

In [62]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad'
}
In [63]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[63]:
228
In [64]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [65]:
res_df
Out[65]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.059128 2020-07-19 22:53:01.277341 12
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.059126 2020-07-19 22:53:01.277336 12
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.059123 2020-07-19 22:53:01.277331 12
3 substrate_rmsd NaN 2020-07-19 22:53:00.059121 2020-07-19 22:53:01.277327 12
4 counterion_rmsd NaN 2020-07-19 22:53:00.059119 2020-07-19 22:53:01.277323 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.059116 2020-07-19 22:53:01.277319 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.059114 2020-07-19 22:53:01.277316 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.059112 2020-07-19 22:53:01.277312 12
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.059110 2020-07-19 22:53:01.277309 12
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.059107 2020-07-19 22:53:01.277305 12
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.059105 2020-07-19 22:53:01.277301 12
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.059103 2020-07-19 22:53:01.277298 12
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.059100 2020-07-19 22:53:01.277294 12
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.059098 2020-07-19 22:53:01.277290 12
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.059096 2020-07-19 22:53:01.277286 12
15 data_file NaN 2020-07-19 22:53:00.059094 2020-07-19 22:53:01.277281 12
16 trajectory_file NaN 2020-07-19 22:53:00.059091 2020-07-19 22:53:01.277279 12
17 energy_file NaN 2020-07-19 22:53:00.059088 2020-07-19 22:53:01.277276 12
18 log_file NaN 2020-07-19 22:53:00.059082 2020-07-19 22:53:01.277270 12

Global observables

In [66]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',  #{'$regex': 'GromacsEnergyMinimization'}
    "metadata.type": 'energy_file',
}
fp.filepad.count_documents(query)
Out[66]:
12
In [67]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [68]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [69]:
[ c for c in cursor]
Out[69]:
[{'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f14c2357dc9cfbf449d1a06'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f14bbd47dc9cfbf449d0d38'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f14dec37dc9cfbf449dfa1e'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f14bcd27dc9cfbf449d0ee1'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f14d0d67dc9cfbf449d8dd9'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f14e7727dc9cfbf449e3415'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f14edae7dc9cfbf449e67b0'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f14f21d7dc9cfbf449e7c12'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f14fe407dc9cfbf449ee191'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f14ff647dc9cfbf449eef29'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f14bd2c7dc9cfbf449d0fe3'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f14bf377dc9cfbf449d121b'}]
In [70]:
res_mi_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        em_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),em_df.index],
            names=[*c["_id"].keys(),'step'])
        em_mi_df = em_df.set_index(mi)        
        res_mi_list.append(em_mi_df)
    print('.',end='')
print('')

res_mi_df = pd.concat(res_mi_list)
res_df = res_mi_df.reset_index()
............
In [71]:
res_mi_df
Out[71]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
nmolecules step
175 0.0 0.0 70977.585938 41767.292969 6486.596680 4012.422852 46502.460938 -586426.3125 -73249.890625 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1.0 1.0 70512.937500 41704.984375 6486.666504 4006.664307 46499.429688 -586434.1250 -73255.750000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2.0 2.0 69955.039062 41630.210938 6486.758301 3999.720459 46495.757812 -586443.4375 -73262.828125 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3.0 3.0 69285.273438 41540.414062 6486.883301 3991.342285 46491.343750 -586454.6250 -73271.359375 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4.0 4.0 68480.921875 41432.589844 6487.045898 3981.211426 46486.011719 -586467.9375 -73281.664062 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
525 9994.0 9994.0 783.403870 40738.222656 14467.318359 3326.141357 135453.031250 -600861.2500 -381380.687500 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9995.0 9995.0 783.662354 40741.503906 14467.341797 3325.624512 135452.625000 -600862.6875 -381382.312500 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9997.0 9997.0 782.888550 40739.394531 14467.355469 3325.936279 135452.843750 -600862.0000 -381381.843750 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9998.0 9998.0 784.021301 40741.851562 14467.424805 3325.574219 135452.546875 -600863.4375 -381383.937500 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9999.0 9999.0 782.983643 40738.820312 14467.438477 3326.022217 135452.859375 -600862.5000 -381383.125000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

93789 rows × 31 columns

In [72]:
res_df
Out[72]:
nmolecules step Time Bond U-B Proper Dih. LJ-14 Coulomb-14 ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
0 175 0.0 0.0 70977.585938 41767.292969 6486.596680 4012.422852 46502.460938 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 175 1.0 1.0 70512.937500 41704.984375 6486.666504 4006.664307 46499.429688 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 175 2.0 2.0 69955.039062 41630.210938 6486.758301 3999.720459 46495.757812 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 175 3.0 3.0 69285.273438 41540.414062 6486.883301 3991.342285 46491.343750 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 175 4.0 4.0 68480.921875 41432.589844 6487.045898 3981.211426 46486.011719 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93784 525 9994.0 9994.0 783.403870 40738.222656 14467.318359 3326.141357 135453.031250 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93785 525 9995.0 9995.0 783.662354 40741.503906 14467.341797 3325.624512 135452.625000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93786 525 9997.0 9997.0 782.888550 40739.394531 14467.355469 3325.936279 135452.843750 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93787 525 9998.0 9998.0 784.021301 40741.851562 14467.424805 3325.574219 135452.546875 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93788 525 9999.0 9999.0 782.983643 40738.820312 14467.438477 3326.022217 135452.859375 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

93789 rows × 33 columns

In [73]:
y_quantities = [
    'Potential',
    'Pressure',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

positions = [
    (0,0),
    (0,1),
    (1,0),
    (2,0),
    (2,1),
]
fig, ax = plt.subplots(3,2,figsize=(10,12))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [74]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [75]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[75]:
12
In [76]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [77]:
res_df
Out[77]:
step type name earliest latest object_count
0 GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad mp4_file NaN 2020-07-19 22:53:00.059128 2020-07-19 22:53:01.277341 12
In [78]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_list.append(Video.from_file(tmp.name))
    print('.',end='')
............
In [79]:
obj_list[-1]

Pulling analysis

Overview on objects in step

In [80]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad'
}
In [81]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[81]:
276
In [82]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [83]:
res_df
Out[83]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.070575 2020-07-19 22:53:01.288914 12
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.070573 2020-07-19 22:53:01.288912 12
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.070571 2020-07-19 22:53:01.288908 12
3 substrate_rmsd NaN 2020-07-19 22:53:00.070568 2020-07-19 22:53:01.288906 12
4 counterion_rmsd NaN 2020-07-19 22:53:00.070566 2020-07-19 22:53:01.288904 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.070564 2020-07-19 22:53:01.288901 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.070561 2020-07-19 22:53:01.288899 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.070559 2020-07-19 22:53:01.288896 12
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.070557 2020-07-19 22:53:01.288894 12
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.070554 2020-07-19 22:53:01.288891 12
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.070552 2020-07-19 22:53:01.288889 12
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.070550 2020-07-19 22:53:01.288883 12
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.070547 2020-07-19 22:53:01.288880 12
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.070545 2020-07-19 22:53:01.288878 12
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.070543 2020-07-19 22:53:01.288876 12
15 topology_file NaN 2020-07-19 22:53:00.070541 2020-07-19 22:53:01.288873 12
16 pullx_file NaN 2020-07-19 22:53:00.070538 2020-07-19 22:53:01.288871 12
17 pullf_file NaN 2020-07-19 22:53:00.070536 2020-07-19 22:53:01.288869 12
18 data_file NaN 2020-07-19 22:53:00.070534 2020-07-19 22:53:01.288867 12
19 compressed_trajectory_file NaN 2020-07-19 22:53:00.070531 2020-07-19 22:53:01.288864 12
20 trajectory_file NaN 2020-07-19 22:53:00.070529 2020-07-19 22:53:01.288862 12
21 energy_file NaN 2020-07-19 22:53:00.070526 2020-07-19 22:53:01.288859 12
22 log_file NaN 2020-07-19 22:53:00.070520 2020-07-19 22:53:01.288853 12

Global observables

The gmx energy table:

  1  Restraint-Pot.   2  U-B              3  Proper-Dih.      4  LJ-14         
  5  Coulomb-14       6  LJ-(SR)          7  Coulomb-(SR)     8  Coul.-recip.  
  9  Position-Rest.  10  COM-Pull-En.    11  Potential       12  Kinetic-En.   
 13  Total-Energy    14  Temperature     15  Pressure        16  Constr.-rmsd  
 17  Vir-XX          18  Vir-XY          19  Vir-XZ          20  Vir-YX        
 21  Vir-YY          22  Vir-YZ          23  Vir-ZX          24  Vir-ZY        
 25  Vir-ZZ          26  Pres-XX         27  Pres-XY         28  Pres-XZ       
 29  Pres-YX         30  Pres-YY         31  Pres-YZ         32  Pres-ZX       
 33  Pres-ZY         34  Pres-ZZ         35  #Surf*SurfTen   36  T-rest

converted to dict with regex

 \s+([0-9]+)\s+([^\s]+)

and replacement

 '$2': $1,\n
In [84]:
gmx_energy_dict = {
    'Restraint-Pot.': 1,
    'U-B': 2,
    'Proper-Dih.': 3,
    'LJ-14': 4,
    'Coulomb-14': 5,
    'LJ-(SR)': 6,
    'Coulomb-(SR)': 7,
    'Coul.-recip.': 8,
    'Position-Rest.': 9,
    'COM-Pull-En.': 10,
    'Potential': 11,
    'Kinetic-En.': 12,
    'Total-Energy': 13,
    'Temperature': 14,
    'Pressure': 15,
    'Constr.-rmsd': 16,
    'Vir-XX': 17,
    'Vir-XY': 18,
    'Vir-XZ': 19,
    'Vir-YX': 20,
    'Vir-YY': 21,
    'Vir-YZ': 22,
    'Vir-ZX': 23,
    'Vir-ZY': 24,
    'Vir-ZZ': 25,
    'Pres-XX': 26,
    'Pres-XY': 27,
    'Pres-XZ': 28,
    'Pres-YX': 29,
    'Pres-YY': 30,
    'Pres-YZ': 31,
    'Pres-ZX': 32,
    'Pres-ZY': 33,
    'Pres-ZZ': 34,
    '#Surf*SurfTen': 35,
    'T-rest': 36,
}
In [85]:
query = { 
    "metadata.project": project_id,
    "metadata.type":    'energy_file',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[85]:
12
In [86]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [87]:
gmx_energy_selection = [
    'Restraint-Pot.',
    'Position-Rest.',
    'COM-Pull-En.',
    'Potential',
    'Kinetic-En.',
    'Total-Energy',
    'Temperature',
    'Pressure',
    'Constr.-rmsd',
]
In [88]:
res_list = []
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

res_df_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    
    #df = panedr.edr_to_df(tmp.name), fails
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.edr', delete=False)
    
    # cur_res_dict = {}
    with tmpin:
        tmpin.write(content)
        #tmpin.seek(0)
       
    res_df = None
    for sel in gmx_energy_selection:  
        try:
            tmpout = tempfile.NamedTemporaryFile(suffix='.xvg', delete=False)
            res = gromacs.energy(f=tmpin.name,o=tmpout.name,
                                 input=str(gmx_energy_dict[sel]))
            #with open(tmpout.name,'r') as f:
            #    xvg = f.read()
            #tmpout.delete()
            xvg = mda.auxiliary.XVG.XVGReader(tmpout.name)
            xvg_time = xvg.read_all_times()
            xvg_data = np.array([ f.data[1:] for f in xvg ]).flatten() # 1st entry contains times
            os.unlink(tmpout.name)
        except: 
            logger.warning("Failed to read '{:s}' from data set {:d}.".format(sel,i))
            failed_list.append((nmolecules, sel))
        else:
            r = {'nmolecules': [nmolecules]*len(xvg_time), 'time': xvg_time, sel: xvg_data}
            cur_df = pd.DataFrame(r)
            if res_df is None:
                res_df = cur_df
            else:
                res_df = pd.merge(res_df, cur_df, how='outer', on=['nmolecules', 'time'])
    res_df_list.append(res_df)
    os.unlink(tmpin.name)
    print('.',end='')
print('')
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
............
In [89]:
res_df_mi
Out[89]:
Restraint-Pot. Position-Rest. COM-Pull-En. Potential Kinetic-En. Total-Energy Temperature Pressure Constr.-rmsd
nmolecules time
88 0.00 1493.480591 0.000000 0.000531 -597754.6875 10.885776 -597743.8125 0.338266 2993.943848 0.000002
0.02 742.482788 0.014380 1.347937 -598352.1875 602.971252 -597749.1875 18.736797 2979.817139 0.000002
0.04 211.578461 0.258170 9.330186 -598732.2500 983.296387 -597748.9375 30.555065 2963.199219 0.000002
0.06 86.002968 1.406035 19.936806 -598830.5000 1084.727295 -597745.7500 33.706940 2956.507812 0.000002
0.08 76.946274 3.596749 29.678221 -598815.8125 1074.096924 -597741.6875 33.376610 2953.386230 0.000002
0.10 84.384720 8.241014 41.989948 -598852.6250 1111.978271 -597740.6250 34.553741 2950.852539 0.000002
0.12 92.128899 15.452235 50.172047 -598762.7500 1027.623535 -597735.1250 31.932491 2938.900391 0.000002
0.14 77.307365 23.783371 52.395569 -598822.8750 1092.996582 -597729.8750 33.963902 2936.008789 0.000002
0.16 60.879116 35.931637 53.148029 -598735.3125 1012.030884 -597723.3125 31.447964 2944.878906 0.000002
0.18 60.419201 52.029709 55.594635 -598739.5625 1020.534302 -597719.0000 31.712200 2952.198730 0.000002
0.20 71.150993 69.232933 62.492477 -598731.8750 1018.525208 -597713.3750 31.649769 2944.013672 0.000002
44 0.00 328.474915 0.000000 0.000214 -586198.1875 3.603998 -586194.5625 0.224069 2965.282227 0.000002
0.02 154.236404 0.004559 0.571383 -586346.1250 150.160233 -586195.9375 9.335811 2963.894531 0.000002
0.04 37.618614 0.057044 3.940316 -586392.9375 197.380096 -586195.5625 12.271580 2963.171387 0.000002
0.06 13.926350 0.259215 8.321849 -586382.4375 187.748901 -586194.6875 11.672786 2964.159180 0.000002
0.08 14.375679 0.799992 12.601824 -586406.8125 213.917740 -586192.8750 13.299763 2964.248535 0.000002
0.10 31.645020 1.935078 17.939360 -586422.4375 230.363159 -586192.0625 14.322213 2961.777832 0.000002
0.12 38.431602 3.885602 21.493801 -586418.0000 227.783508 -586190.1875 14.161830 2955.664062 0.000002
0.14 22.140791 6.880564 22.757519 -586445.0000 257.068878 -586187.9375 15.982570 2956.925537 0.000002
0.16 10.551405 11.193547 23.809597 -586437.0625 251.889709 -586185.1875 15.660569 2964.223389 0.000002
0.18 4.676281 17.088915 26.599503 -586471.3125 288.551575 -586182.7500 17.939922 2969.114746 0.000002
0.20 3.778655 24.678282 31.761971 -586463.1875 283.047913 -586180.1250 17.597746 2964.830811 0.000002
438 0.00 7758.632324 0.000000 0.003609 -728824.7500 75.321106 -728749.4375 0.470099 2898.045410 0.000002
0.02 3483.291748 0.044684 6.999135 -732201.1250 3414.105957 -728787.0000 21.308352 2876.061279 0.000002
0.04 904.910156 1.917580 49.792530 -733586.1875 4796.172363 -728790.0000 29.934200 2856.181152 0.000002
0.06 431.056061 10.018900 107.124191 -733629.4375 4850.733398 -728778.6875 30.274731 2858.325195 0.000002
0.08 463.053925 24.228683 159.585251 -733499.8125 4745.547852 -728754.2500 29.618238 2840.774902 0.000002
0.10 722.669312 42.863976 216.961929 -733307.8125 4561.733398 -728746.0625 28.471003 2764.229492 0.000002
0.12 728.341125 64.390976 246.327728 -733277.0000 4542.275879 -728734.7500 28.349564 2718.754395 0.000002
0.14 504.542664 88.204720 249.874741 -733450.3750 4737.693359 -728712.6875 29.569216 2773.185547 0.000002
0.16 398.171021 113.320412 250.525116 -733193.1250 4508.420410 -728684.6875 28.138262 2852.901367 0.000002
0.18 451.732758 139.405441 260.177277 -733258.3125 4593.628906 -728664.6875 28.670073 2891.368652 0.000002
0.20 578.219666 165.859543 293.536987 -733307.0000 4659.278320 -728647.7500 29.079807 2841.210693 0.000002
394 0.00 11153.032227 0.000000 0.002766 -701380.5625 86.993607 -701293.5625 0.603590 3101.000977 0.000002
0.02 5175.661133 0.029903 9.719401 -705958.7500 4617.014648 -701341.7500 32.034340 3004.514648 0.000002
0.04 1388.958374 1.713053 70.931076 -708225.8125 6887.353027 -701338.4375 47.786682 2892.891113 0.000002
0.06 580.233398 10.463760 150.519775 -708553.9375 7226.486328 -701327.4375 50.139698 2850.353516 0.000002
0.08 497.414459 27.786430 221.173691 -708173.5000 6874.951172 -701298.5625 47.700634 2832.396484 0.000002
0.10 721.945679 56.056725 308.627136 -707836.1250 6543.200195 -701292.9375 45.398838 2773.343750 0.000002
0.12 795.619202 95.645821 360.062439 -707478.8750 6203.111328 -701275.7500 43.039192 2732.782227 0.000002
0.14 645.407593 141.712265 367.164978 -707640.2500 6389.089844 -701251.1875 44.329571 2778.604004 0.000002
0.16 536.192871 190.179138 370.243866 -707199.5000 5982.742188 -701216.7500 41.510197 2850.090332 0.000002
0.18 598.552185 240.939117 375.614685 -707394.4375 6193.708008 -701200.7500 42.973946 2873.233887 0.000002
0.20 676.540894 286.954498 393.765961 -707266.6875 6090.129883 -701176.5625 42.255287 2819.694336 0.000002
350 0.00 7545.695801 0.000000 0.002528 -689301.0000 67.677567 -689233.3125 0.528606 3023.489990 0.000002
0.02 3466.935547 0.022239 8.092226 -692411.8750 3147.667480 -689264.1875 24.585342 2953.315430 0.000002
0.04 1004.460327 0.674029 57.397018 -693776.4375 4510.098633 -689266.3125 35.226818 2882.658936 0.000002
0.06 508.296936 3.862727 121.058868 -693851.9375 4598.857422 -689253.0625 35.920086 2857.650391 0.000002
0.08 447.073303 11.796712 178.167221 -693697.7500 4465.446289 -689232.3125 34.878059 2847.791992 0.000002
0.10 540.257874 26.627554 246.417404 -693660.0625 4435.601562 -689224.4375 34.644951 2816.611084 0.000002
0.12 537.844849 46.440964 286.874969 -693448.0625 4239.412109 -689208.6250 33.112583 2782.551758 0.000002
0.14 454.933167 68.224663 294.416962 -693607.2500 4419.962891 -689187.3125 34.522804 2807.801270 0.000002
0.16 353.368195 91.454712 291.752136 -693381.8750 4223.267578 -689158.6250 32.986485 2866.880859 0.000002
0.18 395.314941 115.658142 291.417450 -693576.0000 4434.130859 -689141.8750 34.633469 2890.599121 0.000002
0.20 464.477203 140.660080 308.393066 -693479.4375 4354.107910 -689125.3125 34.008430 2845.946289 0.000002
263 0.00 6698.218262 0.000000 0.001731 -647402.8125 55.522999 -647347.3125 0.577147 3038.223633 0.000002
0.02 3136.052734 0.025132 5.561255 -650155.5625 2779.114258 -647376.4375 28.888174 2979.119141 0.000002
0.04 796.402527 1.414109 41.152927 -651652.5000 4278.487305 -647374.0000 44.473770 2903.262207 0.000002
0.06 316.174133 8.055161 91.455330 -651770.9375 4404.885742 -647366.0625 45.787651 2872.420898 0.000002
0.08 301.948639 21.323339 136.764023 -651591.8750 4242.446777 -647349.4375 44.099140 2861.912842 0.000002
0.10 467.585907 42.046680 186.849228 -651442.3750 4099.144531 -647343.2500 42.609550 2830.160156 0.000002
0.12 567.959534 70.121613 215.958389 -651228.3750 3899.988770 -647328.3750 40.539375 2804.322021 0.000002
0.14 427.187744 101.832092 218.040955 -651443.1875 4132.283203 -647310.8750 42.954018 2830.333496 0.000002
0.16 260.240265 133.691437 214.978592 -651305.6250 4013.472656 -647292.1250 41.719013 2876.405029 0.000002
0.18 234.081543 164.680359 218.439667 -651482.7500 4204.058594 -647278.6875 43.700104 2905.873047 0.000002
0.20 323.221283 192.069763 237.972260 -651352.8125 4090.639893 -647262.1875 42.521145 2882.826904 0.000002
525 0.00 8607.337891 0.000000 0.003872 -767427.5000 103.417938 -767324.0625 0.538490 2912.214355 0.000002
0.02 3682.100586 0.030841 10.701400 -771187.8750 3813.049072 -767374.8125 19.854294 2874.255859 0.000002
0.04 980.268738 1.021871 72.757286 -772305.9375 4922.262207 -767383.6875 25.629894 2852.163574 0.000002
0.06 463.856201 6.101664 148.666412 -772300.0625 4935.623047 -767364.4375 25.699463 2864.441406 0.000002
0.08 469.606689 16.675367 218.432358 -772090.1250 4760.152344 -767330.0000 24.785797 2871.559082 0.000002
0.10 750.125427 32.825645 305.430908 -771894.1875 4572.471680 -767321.6875 23.808556 2792.189209 0.000002
0.12 735.697388 53.509453 349.977936 -771890.5625 4578.110352 -767312.4375 23.837917 2749.816895 0.000002
0.14 573.998840 76.868851 363.524170 -772041.5000 4755.625977 -767285.8750 24.762230 2803.604492 0.000002
0.16 470.507111 101.520493 371.835419 -771694.0625 4449.618652 -767244.4375 23.168873 2889.657715 0.000002
0.18 438.253418 128.023483 375.800018 -772003.6875 4781.264648 -767222.4375 24.895729 2930.999756 0.000002
0.20 576.049805 154.629288 405.836670 -771999.1250 4798.699219 -767200.4375 24.986509 2862.979980 0.000002
175 0.00 3874.121826 0.000000 0.000969 -621884.6875 29.880894 -621854.8125 0.466824 2987.429199 0.000002
0.02 1937.193237 0.013868 2.920847 -623383.8125 1511.335205 -621872.5000 23.611334 2959.416016 0.000002
0.04 557.916260 0.388364 21.011381 -624241.6875 2370.242676 -621871.4375 37.029900 2928.752441 0.000002
0.06 231.725311 2.498876 45.730106 -624412.6875 2547.578125 -621865.1250 39.800381 2916.041748 0.000002
0.08 203.605927 7.247248 69.622650 -624304.5625 2449.355469 -621855.1875 38.265865 2909.197754 0.000002
0.10 233.991562 15.433475 99.979263 -624341.6250 2489.035400 -621852.5625 38.885780 2892.194092 0.000002
0.12 259.917816 28.493690 120.148582 -624227.3125 2382.177734 -621845.1250 37.216358 2872.083740 0.000002
0.14 204.794815 46.641544 125.648056 -624273.3750 2438.454102 -621834.9375 38.095554 2885.962646 0.000002
0.16 174.934906 66.930740 128.548691 -624090.9375 2272.121582 -621818.8125 35.496971 2913.712402 0.000002
0.18 189.451401 85.810219 132.458099 -624162.0625 2348.172607 -621813.8750 36.685104 2924.983154 0.000002
0.20 224.017120 103.386101 143.336105 -624091.6250 2287.826660 -621803.8125 35.742329 2905.412842 0.000002
481 0.00 9428.630859 0.000000 0.003602 -743249.6875 88.234375 -743161.4375 0.501460 2967.549805 0.000002
0.02 4282.659668 0.035145 8.206218 -747143.0625 3936.248779 -743206.8125 22.370794 2917.894531 0.000002
0.04 1206.039062 1.942907 57.165253 -748813.2500 5602.726562 -743210.5000 31.841852 2873.593506 0.000002
0.06 518.847229 10.420077 122.135185 -748911.7500 5713.704102 -743198.0625 32.472565 2859.785156 0.000002
0.08 499.082703 27.824352 182.521652 -748706.8750 5538.993164 -743167.8750 31.479633 2834.067383 0.000002
0.10 703.608521 56.923763 254.324661 -748625.1250 5464.617188 -743160.5000 31.056938 2769.378906 0.000002
0.12 702.315430 93.865196 298.449799 -748276.5625 5129.429688 -743147.1250 29.151974 2733.182373 0.000002
0.14 544.695007 132.905029 314.746765 -748412.3125 5289.708984 -743122.6250 30.062885 2777.386475 0.000002
0.16 415.489777 169.417603 322.989532 -748018.1250 4933.263672 -743084.8750 28.037109 2848.407715 0.000002
0.18 460.445129 201.549957 331.189545 -748261.8125 5193.457520 -743068.3750 29.515862 2884.031250 0.000002
0.20 654.798218 232.310684 363.722015 -748198.9375 5148.762695 -743050.1875 29.261848 2832.586670 0.000002
306 0.00 7031.586914 0.000000 0.001686 -671124.8750 58.651764 -671066.2500 0.523988 3026.758545 0.000002
0.02 3242.020264 0.040133 6.152946 -674103.5000 3007.474609 -671096.0000 26.868418 2966.178467 0.000002
0.04 882.035706 1.412255 43.335892 -675576.5625 4480.565918 -671096.0000 40.028839 2892.748535 0.000002
0.06 406.461609 7.572046 89.815346 -675827.7500 4741.573242 -671086.1875 42.360645 2866.604980 0.000002
0.08 316.891846 19.850439 132.287506 -675710.5625 4641.774414 -671068.8125 41.469051 2855.712891 0.000002
0.10 443.524994 38.274605 184.882965 -675507.8750 4445.791992 -671062.0625 39.718174 2816.819824 0.000002
0.12 562.236145 55.974861 214.112564 -675287.1250 4238.885742 -671048.2500 37.869698 2769.663086 0.000002
0.14 506.160767 78.852089 218.645889 -675450.5000 4422.254883 -671028.2500 39.507896 2790.758789 0.000002
0.16 365.383362 105.988388 222.624802 -675234.9375 4231.237305 -671003.6875 37.801369 2841.823242 0.000002
0.18 371.525513 134.626602 234.836151 -675306.4375 4315.522461 -670990.9375 38.554359 2874.704102 0.000002
0.20 399.296844 163.539642 263.656708 -675284.5000 4311.053711 -670973.4375 38.514439 2849.378662 0.000002
219 0.00 6632.435059 0.000000 0.001199 -637024.9375 47.841904 -636977.1250 0.597235 3091.846191 0.000002
0.02 3121.731689 0.016906 6.397262 -639693.3125 2691.462158 -637001.8750 33.598892 3010.247559 0.000002
0.04 869.231262 0.902822 47.208561 -641100.4375 4098.412109 -637002.0000 51.162563 2918.035156 0.000002
0.06 397.136932 6.618818 97.188881 -641276.8750 4284.237793 -636992.6250 53.482319 2877.015137 0.000002
0.08 351.918945 19.863176 134.528717 -641112.8750 4136.791016 -636976.0625 51.641666 2872.137207 0.000002
0.10 390.079102 40.681301 180.328781 -641108.6250 4137.142090 -636971.5000 51.646049 2859.713623 0.000002
0.12 368.125366 66.592979 208.421463 -640976.0000 4015.029297 -636961.0000 50.121651 2833.619873 0.000002
0.14 296.856110 96.319504 208.719437 -641049.1875 4104.638184 -636944.5625 51.240284 2840.595215 0.000002
0.16 270.976379 130.172577 204.839157 -640627.6250 3703.714355 -636923.9375 46.235348 2862.900879 0.000002
0.18 251.967087 169.544434 209.414276 -640724.1875 3808.379395 -636915.8125 47.541935 2875.118896 0.000002
0.20 273.513489 210.551559 227.073532 -640575.3125 3676.054443 -636899.2500 45.890057 2845.762207 0.000002
131 0.00 5439.272461 0.000000 0.000994 -605089.6250 45.180439 -605044.4375 0.942986 3058.089844 0.000003
0.02 2392.614502 0.033704 5.545445 -607416.2500 2352.045166 -605064.1875 49.090851 3005.666016 0.000002
0.04 590.289368 2.455495 41.046505 -608709.0000 3643.850098 -605065.1250 76.052834 2927.380859 0.000002
0.06 318.442780 9.784841 78.639977 -608904.2500 3841.742432 -605062.5000 80.183159 2890.686279 0.000002
0.08 248.531052 20.016790 98.876549 -608758.8750 3708.537354 -605050.3125 77.402962 2891.798584 0.000003
0.10 242.611725 31.480095 125.397232 -608651.8125 3604.449219 -605047.3750 75.230476 2885.085938 0.000002
0.12 343.100250 42.242630 142.602478 -608583.5000 3541.214844 -605042.3125 73.910675 2866.785889 0.000002
0.14 403.812500 53.514503 139.782608 -608453.6250 3424.105225 -605029.5000 71.466415 2869.413086 0.000002
0.16 342.609131 68.988907 138.817230 -608358.6250 3340.652832 -605018.0000 69.724632 2898.038086 0.000002
0.18 307.776978 90.317818 140.629562 -608259.4375 3247.385254 -605012.0625 67.777992 2918.379395 0.000002
0.20 402.171112 115.269615 146.295227 -608157.8750 3154.024414 -605003.8750 65.829407 2902.051270 0.000002
In [90]:
cols = 2
y_quantities = [
    'Restraint-Pot.',
    'Position-Rest.',
    'COM-Pull-En.',
    'Potential',
    'Kinetic-En.',
    'Total-Energy',
    'Temperature',
    'Pressure',
    'Constr.-rmsd',
    ]
n = len(y_quantities)
rows = round(n/cols)
positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Pulling forces

In [91]:
res_df_list = []
failed_list = []

query = { 
    "metadata.project": project_id,
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
    "metadata.type": 'pullf_file',
}

fp.filepad.count_documents(query)
match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.xvg', delete=False)
    
    with tmpin:
        tmpin.write(content)
        
    try:
        xvg = mda.auxiliary.XVG.XVGReader(tmpin.name)
        xvg_time = xvg.read_all_times()
        xvg_data = np.array([ f.data[1:] for f in xvg ])# .flatten() # 1st entry contains times
    except: 
        logger.warning("Failed to read data set {:d}.".format(i))
        failed_list.append(nmolecules)
    else:
        res_df_list.append(pd.DataFrame({
            'nmolecules': np.array([nmolecules]*len(xvg_time), dtype=int),
            'time': xvg_time, 
            **{i: xvg_data[:,i] for i in range(nmolecules)}
        }))
    os.unlink(tmpin.name)
    print('.',end='')
print('')
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
............
In [92]:
# pulling forces
n = len(res_df['nmolecules'].unique())
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
for pos, (key, grp) in zip(positions,res_df.groupby(['nmolecules'])):
    columns = list(set(grp.columns) - set(['nmolecules','time']))
    grp.plot('time', columns, ax=ax[pos],title=key,legend=None)
fig.tight_layout()
In [93]:
# mean pulling force
fig, ax = plt.subplots(1,1,figsize=(5,4))
for key, grp in res_df.groupby(['nmolecules']):
    columns = list(set(grp.columns) - set(['nmolecules','time']))
    grp = grp.set_index('time')
    grp = grp.drop(columns='nmolecules')
    grp.mean(axis=1).plot(legend=True, label=key, ax=ax)
fig.tight_layout()
#fig.legend()

Pulling groups movement

In [94]:
res_df_list = []
failed_list = []

query = { 
    "metadata.project": project_id,
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
    "metadata.type":    'pullx_file',
}

fp.filepad.count_documents(query)
match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)


for i, c in enumerate(cursor): 
    print(c["_id"])
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])  # int(metadata["metadata"]["nmolecules"])
    
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.xvg', delete=False)
    
    with tmpin:
        tmpin.write(content)
        
    try:
        xvg = gromacs.fileformats.XVG(tmpin.name)
        xvg_time = xvg.array[0,:]
        
        #xvg_labels = ['1', '1 ref', '1 dX', '1 dY', '1 dZ', '1 g 1 X', '1 g 1 Y', '1 g 1 Z', '1 g 2 X', '1 g 2 Y', '1 g 2 Z']
        N_pull_coords = nmolecules
        N_cols = len(xvg.names)
        N_cols_per_coord = int(N_cols / N_pull_coords)
        
        xvg_labels = xvg.names[:N_cols_per_coord]
        xvg_data = {}
        for j in range(N_pull_coords):
            for k in range(N_cols_per_coord):
                xvg_data[(j,xvg_labels[k])] = xvg.array[
                    1+j*N_cols_per_coord+k,:]
        
    except: 
        logger.exception("Failed to read data set {:d}.".format(i))
        failed_list.append(nmolecules)

    else:
        res_df_list.append(pd.DataFrame({
            'nmolecules': np.array([nmolecules]*len(xvg_time), dtype=int),
            'time': xvg_time, 
            **xvg_data # {i: xvg_data[:,i] for i in range(nmolecules)}
        }))
    os.unlink(tmpin.name)
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
res_df_mi.columns = pd.MultiIndex.from_tuples(res_df_mi.columns, names=['nmolecule', 'coord'])
{'nmolecules': 481}
{'nmolecules': 438}
{'nmolecules': 394}
{'nmolecules': 350}
{'nmolecules': 263}
{'nmolecules': 219}
{'nmolecules': 175}
{'nmolecules': 131}
{'nmolecules': 306}
{'nmolecules': 44}
{'nmolecules': 525}
{'nmolecules': 88}
In [95]:
res_df_mi
Out[95]:
nmolecule 0 ... 524
coord 1 1 ref 1 dX 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z ... 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z 1 g 2 X 1 g 2 Y 1 g 2 Z
nmolecules time
481 0.0 3.02906 3.02901 2.669190 0.972790 -1.050880 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.03794 3.04901 2.673090 0.983246 -1.056830 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
438 0.0 3.06981 3.06970 -1.695070 -0.217678 2.550120 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.05914 3.08970 -1.693620 -0.208617 2.538990 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
394 0.0 3.05858 3.05857 1.789350 2.047640 1.400090 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.05402 3.07857 1.785950 2.048270 1.393560 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
350 0.0 2.91510 2.91528 1.507870 -1.258800 -2.153960 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.91157 2.93528 1.523610 -1.230070 -2.154710 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
263 0.0 3.09102 3.09096 1.908980 -1.902280 1.513770 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.11048 3.11096 1.924900 -1.903870 1.531380 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
219 0.0 3.08224 3.08202 -1.097430 -2.056400 2.016690 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.09812 3.10202 -1.135390 -2.050320 2.026190 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
175 0.0 2.98740 2.98736 -1.122020 2.768040 -0.059902 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.98779 3.00736 -1.125900 2.766770 -0.064975 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
131 0.0 3.09045 3.09024 -0.850902 -1.544340 2.538080 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.08927 3.11024 -0.843101 -1.545660 2.538450 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
306 0.0 2.99688 2.99656 2.234560 -1.989880 0.168659 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.98128 3.01656 2.212180 -1.992180 0.159765 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
44 0.0 2.87939 2.87932 2.060960 1.771370 -0.951617 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.88188 2.89932 2.055500 1.778930 -0.956850 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
525 0.0 3.04277 3.04264 -3.012120 -0.408986 0.135390 6.778 6.802 6.763 ... -0.989800 2.01098 6.778 6.802 6.763 4.71159 5.8122 8.77398
0.2 3.05486 3.06264 -3.021620 -0.423459 0.150595 6.778 6.802 6.763 ... -0.956393 2.01185 6.778 6.802 6.763 4.71620 5.8456 8.77485
88 0.0 2.96425 2.96426 2.212950 0.236086 -1.958030 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.92075 2.98426 2.210040 0.260372 -1.891750 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN

24 rows × 5775 columns

In [96]:
res_df = res_df_mi.groupby(axis=1,level='coord').mean().reset_index()
In [97]:
res_df
Out[97]:
coord nmolecules time 1 1 dX 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z 1 g 2 X 1 g 2 Y 1 g 2 Z 1 ref
0 481 0.0 3.034603 -0.014782 -0.072324 0.067737 6.799 6.864 6.719 6.784223 6.791673 6.786737 3.034543
1 481 0.2 3.025667 -0.013379 -0.071653 0.067348 6.799 6.864 6.719 6.785626 6.792344 6.786349 3.054543
2 438 0.0 3.034992 -0.005472 -0.046267 0.093900 6.789 6.821 6.695 6.783532 6.774730 6.788901 3.034932
3 438 0.2 3.026447 -0.006146 -0.046641 0.094356 6.789 6.821 6.695 6.782858 6.774356 6.789357 3.054932
4 394 0.0 3.039270 0.042538 0.071386 0.102983 6.749 6.708 6.695 6.791542 6.779382 6.797985 3.039219
5 394 0.2 3.025625 0.040787 0.071826 0.102654 6.749 6.708 6.695 6.789791 6.779823 6.797655 3.059219
6 350 0.0 3.036760 0.080290 -0.023688 0.004388 6.720 6.803 6.789 6.800295 6.779309 6.793390 3.036711
7 350 0.2 3.023129 0.081602 -0.021021 0.004253 6.720 6.803 6.789 6.801606 6.781976 6.793254 3.056711
8 263 0.0 3.037367 0.090980 -0.167417 -0.159162 6.719 6.928 6.938 6.809985 6.760580 6.778839 3.037313
9 263 0.2 3.023441 0.092386 -0.165892 -0.159699 6.719 6.928 6.938 6.811390 6.762104 6.778302 3.057313
10 219 0.0 3.035669 -0.001536 0.017633 0.134667 6.783 6.773 6.687 6.781467 6.790630 6.821668 3.035626
11 219 0.2 3.019158 -0.002352 0.018440 0.133283 6.783 6.773 6.687 6.780652 6.791437 6.820284 3.055626
12 175 0.0 3.030009 0.080984 -0.018843 -0.056047 6.715 6.790 6.825 6.795988 6.771153 6.768955 3.029957
13 175 0.2 3.018155 0.078428 -0.017950 -0.058704 6.715 6.790 6.825 6.793433 6.772047 6.766297 3.049957
14 131 0.0 3.037290 0.008238 0.222722 0.160375 6.785 6.624 6.675 6.793242 6.846719 6.835376 3.037226
15 131 0.2 3.020346 0.011488 0.221172 0.159499 6.785 6.624 6.675 6.796492 6.845169 6.834499 3.057226
16 306 0.0 3.030811 -0.003623 0.015265 0.000827 6.785 6.759 6.790 6.781381 6.774262 6.790828 3.030759
17 306 0.2 3.016804 -0.005029 0.012748 -0.000155 6.785 6.759 6.790 6.779975 6.771745 6.789846 3.050759
18 44 0.0 3.020985 0.235488 0.313048 0.051773 6.660 6.627 6.730 6.895492 6.940044 6.781774 3.020933
19 44 0.2 3.008821 0.236887 0.314483 0.049420 6.660 6.627 6.730 6.896890 6.941478 6.779421 3.040932
20 525 0.0 3.034922 0.010765 -0.024874 0.038641 6.778 6.802 6.763 6.788769 6.777123 6.801643 3.034867
21 525 0.2 3.025270 0.009207 -0.024542 0.039873 6.778 6.802 6.763 6.787212 6.777454 6.802874 3.054867
22 88 0.0 3.036086 0.236927 -0.256623 0.157635 6.624 6.903 6.676 6.860931 6.646373 6.833636 3.036024
23 88 0.2 3.022179 0.235431 -0.257090 0.160130 6.624 6.903 6.676 6.859435 6.645907 6.836131 3.056024
In [98]:
cols = 2
y_quantities = [
    '1', 
    '1 ref', 
    '1 dX', 
    '1 dY', 
    '1 dZ', 
    '1 g 1 X', 
    '1 g 1 Y', 
    '1 g 1 Z', 
    '1 g 2 X', 
    '1 g 2 Y', 
    '1 g 2 Z'
]
n = len(y_quantities)
rows = round(n/cols)
positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Rendered videos

In [99]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [100]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[100]:
12
In [101]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [102]:
res_df
Out[102]:
step type name earliest latest object_count
0 GromacsPull:ProcessAnalyzeAndVisualize:push_filepad mp4_file NaN 2020-07-19 22:53:00.070575 2020-07-19 22:53:01.288914 12
In [103]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_list.append(Video.from_file(tmp.name))
    print('.',end='')
............
In [104]:
for obj in obj_list:
    display(obj)

Pre-evaluated RDF

Overview

In [105]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[105]:
120
In [106]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [107]:
res_df
Out[107]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-19 22:53:00.070564 2020-07-19 22:53:01.288901 12
1 surfactant_head_surfactant_tail_rdf 2020-07-19 22:53:00.070561 2020-07-19 22:53:01.288899 12
2 surfactant_head_surfactant_head_rdf 2020-07-19 22:53:00.070559 2020-07-19 22:53:01.288896 12
3 substrate_surfactant_tail_rdf 2020-07-19 22:53:00.070557 2020-07-19 22:53:01.288894 12
4 substrate_surfactant_head_rdf 2020-07-19 22:53:00.070554 2020-07-19 22:53:01.288891 12
5 substrate_substrate_rdf 2020-07-19 22:53:00.070552 2020-07-19 22:53:01.288889 12
6 counterion_surfactant_tail_rdf 2020-07-19 22:53:00.070550 2020-07-19 22:53:01.288883 12
7 counterion_surfactant_head_rdf 2020-07-19 22:53:00.070547 2020-07-19 22:53:01.288880 12
8 counterion_substrate_rdf 2020-07-19 22:53:00.070545 2020-07-19 22:53:01.288878 12
9 counterion_counterion_rdf 2020-07-19 22:53:00.070543 2020-07-19 22:53:01.288876 12

Substrate - surfactant head RDF

In [108]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [109]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[109]:
12
In [110]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [111]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [112]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [113]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[113]:
12
In [114]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [115]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [116]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [117]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[117]:
12
In [118]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [119]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Energy minimization after solvation analysis

Overview on objects in step

In [120]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad'
}
In [121]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[121]:
240
In [122]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [123]:
res_df
Out[123]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.082476 2020-07-19 22:53:01.300371 12
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.082474 2020-07-19 22:53:01.300369 12
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.082471 2020-07-19 22:53:01.300367 12
3 substrate_rmsd NaN 2020-07-19 22:53:00.082469 2020-07-19 22:53:01.300364 12
4 counterion_rmsd NaN 2020-07-19 22:53:00.082467 2020-07-19 22:53:01.300362 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.082464 2020-07-19 22:53:01.300360 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.082462 2020-07-19 22:53:01.300357 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.082460 2020-07-19 22:53:01.300355 12
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.082458 2020-07-19 22:53:01.300353 12
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.082455 2020-07-19 22:53:01.300351 12
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.082453 2020-07-19 22:53:01.300348 12
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.082451 2020-07-19 22:53:01.300346 12
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.082449 2020-07-19 22:53:01.300344 12
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.082446 2020-07-19 22:53:01.300341 12
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.082444 2020-07-19 22:53:01.300339 12
15 topology_file NaN 2020-07-19 22:53:00.082442 2020-07-19 22:53:01.300337 12
16 data_file NaN 2020-07-19 22:53:00.082439 2020-07-19 22:53:01.300334 12
17 trajectory_file NaN 2020-07-19 22:53:00.082437 2020-07-19 22:53:01.300332 12
18 energy_file NaN 2020-07-19 22:53:00.082434 2020-07-19 22:53:01.300329 12
19 log_file NaN 2020-07-19 22:53:00.082429 2020-07-19 22:53:01.300323 12

Global observables

In [124]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',  #{'$regex': 'GromacsEnergyMinimization'}
    "metadata.type": 'energy_file',
}
fp.filepad.count_documents(query)
Out[124]:
12
In [125]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [126]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [127]:
res_mi_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        em_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),em_df.index],
            names=[*c["_id"].keys(),'step'])
        em_mi_df = em_df.set_index(mi)        
        res_mi_list.append(em_mi_df)
    print('.',end='')
print('')

res_mi_df = pd.concat(res_mi_list)
res_df = res_mi_df.reset_index()
............
In [128]:
res_mi_df
Out[128]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
nmolecules step
481 0.0 0.0 141.694092 40054.796875 14063.016602 3293.846924 124254.132812 935492.687500 -3714741.75 ... 38.823322 -281.488861 -34.370895 62.575344 -34.370895 -278.940094 -0.0 0.0
1.0 1.0 141.661545 40054.625000 14063.023438 3294.069824 124254.476562 933008.437500 -3714762.75 ... 38.673538 -281.520721 -34.422112 62.373291 -34.422123 -279.148682 -0.0 0.0
2.0 2.0 141.649338 40054.390625 14063.034180 3294.339600 124254.914062 930034.500000 -3714788.00 ... 38.493793 -281.558563 -34.479385 62.131145 -34.479393 -279.397156 -0.0 0.0
3.0 3.0 141.683838 40054.140625 14063.042969 3294.667725 124255.429688 926463.375000 -3714819.00 ... 38.278286 -281.602325 -34.545727 61.843330 -34.545715 -279.693451 -0.0 0.0
4.0 4.0 141.793335 40053.808594 14063.057617 3295.075684 124256.093750 922176.000000 -3714856.50 ... 38.020149 -281.654419 -34.619755 61.499599 -34.619751 -280.043854 -0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 9994.0 9994.0 99.860451 6682.258301 2362.781250 559.270020 22817.695312 -91124.109375 -4491545.50 ... -10.067997 -402.108795 6.482723 12.649333 6.482725 -396.173065 0.0 0.0
9995.0 9995.0 101.397034 6682.396973 2362.779053 560.352173 22818.513672 -91123.562500 -4491552.50 ... -10.068232 -402.109344 6.482833 12.649398 6.482831 -396.173828 0.0 0.0
9997.0 9997.0 96.772743 6681.637207 2362.777588 559.687805 22818.021484 -91122.265625 -4491554.00 ... -10.068281 -402.109894 6.482832 12.649323 6.482830 -396.173920 0.0 0.0
9998.0 9998.0 103.469955 6682.873047 2362.764404 560.463501 22818.607422 -91115.492188 -4491580.50 ... -10.069194 -402.112305 6.482836 12.649473 6.482837 -396.177673 0.0 0.0
9999.0 9999.0 97.318970 6681.770020 2362.762695 559.520203 22817.896484 -91113.968750 -4491582.00 ... -10.069223 -402.112488 6.482925 12.649437 6.482922 -396.177856 0.0 0.0

95071 rows × 33 columns

In [129]:
y_quantities = [
    'Potential',
    'Pressure',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

positions = [
    (0,0),
    (0,1),
    (1,0),
    (2,0),
    (2,1),
]
fig, ax = plt.subplots(3,2,figsize=(10,12))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [130]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [131]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[131]:
12
In [132]:
# check

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'nmolecules': '$metadata.system.surfactant.nmolecules'
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

# for i, c in enumerate(cursor): 
#    content, metadata = fp.get_file_by_id(c["latest"])
#    nmolecules = int(c["_id"]["nmolecules"])
    

res = [ {**c['_id'], **c} for c in cursor]
columns = ['nmolecules', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [133]:
res_df
Out[133]:
nmolecules name earliest latest object_count
0 525 NaN 2020-07-19 22:53:01.300371 2020-07-19 22:53:01.300371 1
1 481 NaN 2020-07-19 22:53:01.193994 2020-07-19 22:53:01.193994 1
2 438 NaN 2020-07-19 22:53:01.089856 2020-07-19 22:53:01.089856 1
3 394 NaN 2020-07-19 22:53:00.973687 2020-07-19 22:53:00.973687 1
4 350 NaN 2020-07-19 22:53:00.864885 2020-07-19 22:53:00.864885 1
5 306 NaN 2020-07-19 22:53:00.754997 2020-07-19 22:53:00.754997 1
6 263 NaN 2020-07-19 22:53:00.640411 2020-07-19 22:53:00.640411 1
7 219 NaN 2020-07-19 22:53:00.530289 2020-07-19 22:53:00.530289 1
8 175 NaN 2020-07-19 22:53:00.415275 2020-07-19 22:53:00.415275 1
9 131 NaN 2020-07-19 22:53:00.303640 2020-07-19 22:53:00.303640 1
10 88 NaN 2020-07-19 22:53:00.190886 2020-07-19 22:53:00.190886 1
11 44 NaN 2020-07-19 22:53:00.082476 2020-07-19 22:53:00.082476 1
In [134]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_dict.update({metadata['metadata']['system']['surfactant']['nmolecules']: Video.from_file(tmp.name)})
    print('.',end='')
............
In [135]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
525
394
438
350
263
175
131
44
306
88
481
219

NVT equilibration analysis

Overview on objects in step

In [136]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [137]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[137]:
252
In [138]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [139]:
res_df
Out[139]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.094277 2020-07-19 22:53:01.311597 12
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.094274 2020-07-19 22:53:01.311594 12
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.094272 2020-07-19 22:53:01.311588 12
3 substrate_rmsd NaN 2020-07-19 22:53:00.094270 2020-07-19 22:53:01.311585 12
4 counterion_rmsd NaN 2020-07-19 22:53:00.094267 2020-07-19 22:53:01.311583 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.094265 2020-07-19 22:53:01.311580 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.094262 2020-07-19 22:53:01.311578 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.094260 2020-07-19 22:53:01.311575 12
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.094258 2020-07-19 22:53:01.311573 12
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.094255 2020-07-19 22:53:01.311571 12
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.094253 2020-07-19 22:53:01.311568 12
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.094251 2020-07-19 22:53:01.311566 12
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.094248 2020-07-19 22:53:01.311563 12
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.094246 2020-07-19 22:53:01.311561 12
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.094244 2020-07-19 22:53:01.311558 12
15 index_file NaN 2020-07-19 22:53:00.094241 2020-07-19 22:53:01.311556 12
16 topology_file NaN 2020-07-19 22:53:00.094239 2020-07-19 22:53:01.311553 12
17 data_file NaN 2020-07-19 22:53:00.094236 2020-07-19 22:53:01.311551 12
18 trajectory_file NaN 2020-07-19 22:53:00.094234 2020-07-19 22:53:01.311548 12
19 energy_file NaN 2020-07-19 22:53:00.094231 2020-07-19 22:53:01.311545 12
20 log_file NaN 2020-07-19 22:53:00.094225 2020-07-19 22:53:01.311533 12

Global observables

In [140]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[140]:
12
In [141]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [142]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [143]:
[ c for c in cursor]
Out[143]:
[{'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f14c8957dc9cfbf449d3b16'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f14c9497dc9cfbf449d47c2'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f14cf9c7dc9cfbf449d7ac3'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f14dcd67dc9cfbf449de717'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f14eb837dc9cfbf449e4201'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f14f7ab7dc9cfbf449e9617'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f14fdb97dc9cfbf449ed4ea'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f14feb17dc9cfbf449ee20f'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f14e3b77dc9cfbf449e081b'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f1510107dc9cfbf449f6c6f'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f14c75c7dc9cfbf449d277d'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f150bf77dc9cfbf449f2e82'}]
In [144]:
res_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [145]:
res_df.columns
Out[145]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.',
       'Position Rest.', 'Potential', 'Kinetic En.', 'Total Energy',
       'Conserved En.', 'Temperature', 'Pressure', 'Constr. rmsd', 'Vir-XX',
       'Vir-XY', 'Vir-XZ', 'Vir-YX', 'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY',
       'Vir-ZZ', 'Pres-XX', 'Pres-XY', 'Pres-XZ', 'Pres-YX', 'Pres-YY',
       'Pres-YZ', 'Pres-ZX', 'Pres-ZY', 'Pres-ZZ', '#Surf*SurfTen',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [146]:
res_df_mi
Out[146]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
481 0.0 0.0 733.488892 36992.550781 13402.288086 3134.245850 124551.203125 -148020.031250 -4553320.50 ... -4130818.75 480839.90625 0.0 0.0 298.916260 0.0 1.0 1.0
1.0 1.0 6369.137695 64824.730469 16620.417969 4206.083008 124065.750000 -205432.328125 -4107370.50 ... -3678724.50 420730.21875 0.0 0.0 206.726028 0.0 1.0 1.0
2.0 2.0 7395.747070 69906.039062 17395.628906 4475.372559 123771.125000 -224666.703125 -4011863.50 ... -3578490.75 400638.96875 0.0 0.0 238.717941 0.0 1.0 1.0
3.0 3.0 7853.290527 73333.234375 17740.519531 4637.836426 123693.906250 -247252.812500 -3930259.75 ... -3496063.25 378916.96875 0.0 0.0 256.919708 0.0 1.0 1.0
4.0 4.0 8203.024414 75309.648438 17998.080078 4786.134766 123690.250000 -257208.343750 -3876227.50 ... -3439610.00 368469.34375 0.0 0.0 269.385254 0.0 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 96.0 96.0 1538.663696 13889.666016 3269.358887 872.792786 22596.740234 -260288.562500 -3574433.75 ... -3483944.00 348176.00000 0.0 0.0 296.900513 0.0 1.0 1.0
97.0 97.0 1510.632324 13696.390625 3206.569580 860.664246 22673.849609 -261339.484375 -3576860.75 ... -3486768.75 347314.34375 0.0 0.0 298.103668 0.0 1.0 1.0
98.0 98.0 1533.184814 13870.621094 3201.865967 866.572021 22687.105469 -261641.171875 -3574124.50 ... -3483832.00 347003.90625 0.0 0.0 297.654694 0.0 1.0 1.0
99.0 99.0 1587.037842 13696.195312 3265.381104 871.215820 22692.685547 -261064.265625 -3576364.50 ... -3485767.25 347491.00000 0.0 0.0 298.297089 0.0 1.0 1.0
100.0 100.0 1644.579956 13734.527344 3153.703125 845.706726 22697.308594 -261413.187500 -3575528.25 ... -3485915.25 347481.93750 0.0 0.0 299.450104 0.0 1.0 1.0

1212 rows × 80 columns

In [147]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [148]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [149]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[149]:
12
In [150]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [151]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(394,)
(438,)
(350,)
(263,)
(175,)
(131,)
(44,)
(306,)
(88,)
(481,)
(219,)

NPT equilibration analysis

Datasets in step

In [152]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [153]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'dataset': '$metadata.step_specific.dtool_push.remote_dataset',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id']['dataset'], **c} for c in cursor]
columns = ['uuid', 'name', 'uri', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [154]:
res_df
Out[154]:
uuid name uri earliest latest object_count
0 fab5395e-d1a2-46c8-96da-035cb7a5a8bf 2020-07-19-22-53-01-317367-n-525-m-525-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-317367-n-525-m-525-gromacsnptequilibration 2020-07-19 22:53:01.322963 2020-07-19 22:53:01.323013 21
1 e3b6a546-e189-4db9-a07b-3740d640670c 2020-07-19-22-53-01-211770-n-481-m-481-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-211770-n-481-m-481-gromacsnptequilibration 2020-07-19 22:53:01.216982 2020-07-19 22:53:01.217035 21
2 0bc96782-99e4-4a02-b247-d40002781c0a 2020-07-19-22-53-01-108983-n-438-m-438-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-108983-n-438-m-438-gromacsnptequilibration 2020-07-19 22:53:01.114240 2020-07-19 22:53:01.114294 21
3 db86544d-4056-4f0f-91d0-c93447ff6805 2020-07-19-22-53-00-992464-n-394-m-394-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-992464-n-394-m-394-gromacsnptequilibration 2020-07-19 22:53:00.998190 2020-07-19 22:53:00.998308 21
4 89555ade-154b-48ac-a338-2151715af8df 2020-07-19-22-53-00-883201-n-350-m-350-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-883201-n-350-m-350-gromacsnptequilibration 2020-07-19 22:53:00.888952 2020-07-19 22:53:00.889014 21
5 b8b840d0-6f04-414d-8383-4123b273cc8d 2020-07-19-22-53-00-772561-n-306-m-306-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-772561-n-306-m-306-gromacsnptequilibration 2020-07-19 22:53:00.778654 2020-07-19 22:53:00.778710 21
6 16e29772-082e-4661-978f-643566753a36 2020-07-19-22-53-00-658358-n-263-m-263-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-658358-n-263-m-263-gromacsnptequilibration 2020-07-19 22:53:00.665453 2020-07-19 22:53:00.665552 21
7 cc81eb50-8ab8-46c2-ac34-46afe79b7417 2020-07-19-22-53-00-549951-n-219-m-219-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-549951-n-219-m-219-gromacsnptequilibration 2020-07-19 22:53:00.555606 2020-07-19 22:53:00.555659 21
8 c4df4417-9051-493f-9187-55a6812ec6da 2020-07-19-22-53-00-433112-n-175-m-175-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-433112-n-175-m-175-gromacsnptequilibration 2020-07-19 22:53:00.438717 2020-07-19 22:53:00.438769 21
9 8ec3c13e-14e0-481f-a453-401b3e284b0f 2020-07-19-22-53-00-320961-n-131-m-131-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-320961-n-131-m-131-gromacsnptequilibration 2020-07-19 22:53:00.326980 2020-07-19 22:53:00.327036 57
10 67a41690-a7c7-4e9e-9d5d-9bec086f0a21 2020-07-19-22-53-00-208131-n-88-m-88-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-208131-n-88-m-88-gromacsnptequilibration 2020-07-19 22:53:00.213505 2020-07-19 22:53:00.213560 21
11 320f292a-ea65-47b1-a83f-ffb5df0f648c 2020-07-19-22-53-00-100083-n-44-m-44-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-100083-n-44-m-44-gromacsnptequilibration 2020-07-19 22:53:00.105618 2020-07-19 22:53:00.105677 21

Overview on objects in step

In [155]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [156]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[156]:
288
In [157]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [158]:
res_df
Out[158]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.105677 2020-07-19 22:53:01.323013 13
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.105675 2020-07-19 22:53:01.323011 13
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.105671 2020-07-19 22:53:01.323009 13
3 substrate_rmsd NaN 2020-07-19 22:53:00.105667 2020-07-19 22:53:01.323006 13
4 counterion_rmsd NaN 2020-07-19 22:53:00.105660 2020-07-19 22:53:01.323004 13
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.105657 2020-07-19 22:53:01.323001 13
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.105655 2020-07-19 22:53:01.322999 14
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.105653 2020-07-19 22:53:01.322997 14
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.105650 2020-07-19 22:53:01.322995 14
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.105648 2020-07-19 22:53:01.322992 14
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.105646 2020-07-19 22:53:01.322990 14
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.105643 2020-07-19 22:53:01.322988 14
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.105641 2020-07-19 22:53:01.322986 14
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.105638 2020-07-19 22:53:01.322983 14
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.105636 2020-07-19 22:53:01.322981 14
15 index_file NaN 2020-07-19 22:53:00.105634 2020-07-19 22:53:01.322979 14
16 topology_file NaN 2020-07-19 22:53:00.105631 2020-07-19 22:53:01.322976 14
17 data_file NaN 2020-07-19 22:53:00.105629 2020-07-19 22:53:01.322974 14
18 trajectory_file NaN 2020-07-19 22:53:00.105626 2020-07-19 22:53:01.322972 14
19 energy_file NaN 2020-07-19 22:53:00.105623 2020-07-19 22:53:01.322969 14
20 log_file NaN 2020-07-19 22:53:00.105618 2020-07-19 22:53:01.322963 14

Global observables

In [159]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[159]:
14
In [160]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [161]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [162]:
[ c for c in cursor]
Out[162]:
[{'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f1503d87dc9cfbf449ef012'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f14db6e7dc9cfbf449dda53'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f14e3157dc9cfbf449dfb5a'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f14cf097dc9cfbf449d6dfd'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f14f0e07dc9cfbf449e6877'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 3,
  'latest': '5f15046d7dc9cfbf449efcbc'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f1511727dc9cfbf449f78cd'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f1515997dc9cfbf449f8548'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f14e9d87dc9cfbf449e34cf'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f14ce767dc9cfbf449d613e'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f14fd247dc9cfbf449ec831'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f14ccfd7dc9cfbf449d5474'}]
In [163]:
res_list = []

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [164]:
res_df.columns
Out[164]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.',
       'Position Rest.', 'Potential', 'Kinetic En.', 'Total Energy',
       'Conserved En.', 'Temperature', 'Pressure', 'Constr. rmsd', 'Box-X',
       'Box-Y', 'Box-Z', 'Volume', 'Density', 'pV', 'Enthalpy', 'Vir-XX',
       'Vir-XY', 'Vir-XZ', 'Vir-YX', 'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY',
       'Vir-ZZ', 'Pres-XX', 'Pres-XY', 'Pres-XZ', 'Pres-YX', 'Pres-YY',
       'Pres-YZ', 'Pres-ZX', 'Pres-ZY', 'Pres-ZZ', '#Surf*SurfTen',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [165]:
res_df_mi
Out[165]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
88 0.0 0.0 1659.547363 13766.999023 3154.668701 847.508911 22697.826172 -261171.90625 -3575470.75 ... -3485865.50 347726.15625 0.0 0.0 300.657349 1.699567 1.0 1.0
1.0 1.0 1572.175049 13818.107422 3189.248047 898.012146 22620.173828 -289901.28125 -3584997.75 ... -3493904.50 353124.56250 0.0 0.0 299.429932 260.991699 1.0 1.0
2.0 2.0 1571.472168 13731.670898 3223.597656 834.129395 22589.814453 -299930.93750 -3582234.75 ... -3491849.75 350642.40625 0.0 0.0 297.920380 186.032990 1.0 1.0
3.0 3.0 1528.322388 13704.244141 3289.937256 821.232178 22569.693359 -299907.75000 -3591731.00 ... -3500881.75 353351.28125 0.0 0.0 299.207397 121.649498 1.0 1.0
4.0 4.0 1492.993530 13962.540039 3207.214111 857.790710 22536.570312 -305155.71875 -3589980.25 ... -3499615.50 349995.18750 0.0 0.0 298.351257 94.199387 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
131 96.0 96.0 2148.888672 19903.812500 4750.384277 1186.712402 33476.066406 -309121.59375 -3652308.50 ... -3518272.50 354399.09375 0.0 0.0 296.655182 41.124649 1.0 1.0
97.0 97.0 2208.041504 20024.515625 4735.063477 1237.817749 33407.867188 -304207.53125 -3658998.00 ... -3524644.00 358882.93750 0.0 0.0 296.436707 42.647045 1.0 1.0
98.0 98.0 2216.830566 19570.035156 4817.375000 1288.840820 33677.285156 -305323.28125 -3656527.25 ... -3522077.00 357743.59375 0.0 0.0 296.995514 41.929428 1.0 1.0
99.0 99.0 2100.892090 19829.517578 4768.525391 1267.084839 33546.695312 -304255.34375 -3657684.75 ... -3523649.00 359175.28125 0.0 0.0 296.471497 42.151382 1.0 1.0
100.0 100.0 2155.678467 19610.035156 4705.484863 1252.351196 33563.519531 -306451.09375 -3655839.50 ... -3521580.75 356740.15625 0.0 0.0 296.461914 43.022167 1.0 1.0

1212 rows × 87 columns

In [166]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Volume',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [167]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [168]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[168]:
13
In [169]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [170]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(219,)
(438,)
(175,)
(394,)
(350,)
(263,)
(131,)
(88,)
(44,)
(306,)
(525,)
(481,)
In [ ]:
 

Pre-evaluated RDF

Overview

In [171]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[171]:
139
In [172]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [173]:
res_df
Out[173]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-19 22:53:00.105657 2020-07-19 22:53:01.323001 13
1 surfactant_head_surfactant_tail_rdf 2020-07-19 22:53:00.105655 2020-07-19 22:53:01.322999 14
2 surfactant_head_surfactant_head_rdf 2020-07-19 22:53:00.105653 2020-07-19 22:53:01.322997 14
3 substrate_surfactant_tail_rdf 2020-07-19 22:53:00.105650 2020-07-19 22:53:01.322995 14
4 substrate_surfactant_head_rdf 2020-07-19 22:53:00.105648 2020-07-19 22:53:01.322992 14
5 substrate_substrate_rdf 2020-07-19 22:53:00.105646 2020-07-19 22:53:01.322990 14
6 counterion_surfactant_tail_rdf 2020-07-19 22:53:00.105643 2020-07-19 22:53:01.322988 14
7 counterion_surfactant_head_rdf 2020-07-19 22:53:00.105641 2020-07-19 22:53:01.322986 14
8 counterion_substrate_rdf 2020-07-19 22:53:00.105638 2020-07-19 22:53:01.322983 14
9 counterion_counterion_rdf 2020-07-19 22:53:00.105636 2020-07-19 22:53:01.322981 14

Substrate - surfactant head RDF

In [174]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [175]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[175]:
14
In [176]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [177]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [178]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [179]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[179]:
14
In [180]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [181]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [182]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [183]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[183]:
14
In [184]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [185]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Relaxation analysis

Datasets in step

In [186]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad'
}
In [187]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'dataset': '$metadata.step_specific.dtool_push.remote_dataset',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id']['dataset'], **c} for c in cursor]
columns = ['uuid', 'name', 'uri', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [188]:
res_df
Out[188]:
uuid name uri earliest latest object_count
0 46025af3-aa13-4d28-a3a0-16cdfa515080 2020-07-19-22-53-01-328749-n-525-m-525-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-328749-n-525-m-525-gromacsrelaxation 2020-07-19 22:53:01.334529 2020-07-19 22:53:01.334582 21
1 5269e851-3df9-4a76-8e3d-9f0981224fae 2020-07-19-22-53-01-222648-n-481-m-481-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-222648-n-481-m-481-gromacsrelaxation 2020-07-19 22:53:01.228234 2020-07-19 22:53:01.228288 21
2 8d1091f3-1b8b-4e88-9d95-137acb0d23c9 2020-07-19-22-53-01-119574-n-438-m-438-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-119574-n-438-m-438-gromacsrelaxation 2020-07-19 22:53:01.125808 2020-07-19 22:53:01.125857 21
3 a133bff6-9294-4727-8d6c-9639817e2177 2020-07-19-22-53-01-003994-n-394-m-394-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-01-003994-n-394-m-394-gromacsrelaxation 2020-07-19 22:53:01.010132 2020-07-19 22:53:01.010185 21
4 8cc16694-ccc6-4f37-93dc-2f77af5a4914 2020-07-19-22-53-00-894817-n-350-m-350-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-894817-n-350-m-350-gromacsrelaxation 2020-07-19 22:53:00.900515 2020-07-19 22:53:00.900566 21
5 7e794aa6-e087-46c8-976b-50328119ec30 2020-07-19-22-53-00-784285-n-306-m-306-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-784285-n-306-m-306-gromacsrelaxation 2020-07-19 22:53:00.790355 2020-07-19 22:53:00.790410 21
6 e1886099-69b1-4acd-b377-9e022fe2a6ec 2020-07-19-22-53-00-671734-n-263-m-263-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-671734-n-263-m-263-gromacsrelaxation 2020-07-19 22:53:00.677699 2020-07-19 22:53:00.677752 21
7 4318c0c1-15da-427a-bd36-8cc09abd5605 2020-07-19-22-53-00-561385-n-219-m-219-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-561385-n-219-m-219-gromacsrelaxation 2020-07-19 22:53:00.567673 2020-07-19 22:53:00.567724 21
8 f6e3e648-1409-4eba-b1af-a982146e4f58 2020-07-19-22-53-00-446008-n-175-m-175-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-446008-n-175-m-175-gromacsrelaxation 2020-07-19 22:53:00.451482 2020-07-19 22:53:00.451532 21
9 9a060f1a-2933-4289-a6b8-073b4f80bbdf 2020-07-19-22-53-00-333996-n-131-m-131-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-333996-n-131-m-131-gromacsrelaxation 2020-07-19 22:53:00.342447 2020-07-19 22:53:00.342501 21
10 5f508fe7-8b94-4863-995c-2e4ecf7c6f35 2020-07-19-22-53-00-219220-n-88-m-88-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-219220-n-88-m-88-gromacsrelaxation 2020-07-19 22:53:00.225186 2020-07-19 22:53:00.225260 21
11 0816f2f6-5e25-4a53-b65e-58282dd3c828 2020-07-19-22-53-00-111278-n-44-m-44-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-19-22-53-00-111278-n-44-m-44-gromacsrelaxation 2020-07-19 22:53:00.116576 2020-07-19 22:53:00.116638 21

Overview on objects in step

In [189]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[189]:
252
In [190]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [191]:
res_df
Out[191]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-19 22:53:00.116638 2020-07-19 22:53:01.334582 12
1 surfactant_tail_rmsd NaN 2020-07-19 22:53:00.116633 2020-07-19 22:53:01.334580 12
2 surfactant_head_rmsd NaN 2020-07-19 22:53:00.116631 2020-07-19 22:53:01.334577 12
3 substrate_rmsd NaN 2020-07-19 22:53:00.116628 2020-07-19 22:53:01.334575 12
4 counterion_rmsd NaN 2020-07-19 22:53:00.116626 2020-07-19 22:53:01.334569 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-19 22:53:00.116623 2020-07-19 22:53:01.334567 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-19 22:53:00.116621 2020-07-19 22:53:01.334565 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-19 22:53:00.116619 2020-07-19 22:53:01.334562 12
8 substrate_surfactant_tail_rdf NaN 2020-07-19 22:53:00.116616 2020-07-19 22:53:01.334560 12
9 substrate_surfactant_head_rdf NaN 2020-07-19 22:53:00.116614 2020-07-19 22:53:01.334558 12
10 substrate_substrate_rdf NaN 2020-07-19 22:53:00.116612 2020-07-19 22:53:01.334556 12
11 counterion_surfactant_tail_rdf NaN 2020-07-19 22:53:00.116609 2020-07-19 22:53:01.334554 12
12 counterion_surfactant_head_rdf NaN 2020-07-19 22:53:00.116607 2020-07-19 22:53:01.334551 12
13 counterion_substrate_rdf NaN 2020-07-19 22:53:00.116604 2020-07-19 22:53:01.334549 12
14 counterion_counterion_rdf NaN 2020-07-19 22:53:00.116602 2020-07-19 22:53:01.334547 12
15 index_file NaN 2020-07-19 22:53:00.116600 2020-07-19 22:53:01.334544 12
16 topology_file NaN 2020-07-19 22:53:00.116597 2020-07-19 22:53:01.334542 12
17 data_file NaN 2020-07-19 22:53:00.116587 2020-07-19 22:53:01.334539 12
18 trajectory_file NaN 2020-07-19 22:53:00.116585 2020-07-19 22:53:01.334537 12
19 energy_file NaN 2020-07-19 22:53:00.116582 2020-07-19 22:53:01.334534 12
20 log_file NaN 2020-07-19 22:53:00.116576 2020-07-19 22:53:01.334529 12

Global observables

In [192]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[192]:
12
In [193]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [194]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [195]:
[ c for c in cursor]
Out[195]:
[{'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f14d8507dc9cfbf449da85b'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f14d9777dc9cfbf449dc151'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f14e4507dc9cfbf449e14b6'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f14ec127dc9cfbf449e4ea8'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f14f9a57dc9cfbf449eaf44'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f1505b57dc9cfbf449f0f72'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f150c897dc9cfbf449f3ae8'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f150d917dc9cfbf449f53b3'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f14f2387dc9cfbf449e7c9d'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f151dee7dc9cfbf449faa26'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f14d73e7dc9cfbf449d8f52'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f1519e07dc9cfbf449f91bb'}]
In [196]:
res_list = []

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [197]:
res_df.columns
Out[197]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.', 'Potential',
       'Kinetic En.', 'Total Energy', 'Conserved En.', 'Temperature',
       'Pressure', 'Constr. rmsd', 'Box-X', 'Box-Y', 'Box-Z', 'Volume',
       'Density', 'pV', 'Enthalpy', 'Vir-XX', 'Vir-XY', 'Vir-XZ', 'Vir-YX',
       'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY', 'Vir-ZZ', 'Pres-XX', 'Pres-XY',
       'Pres-XZ', 'Pres-YX', 'Pres-YY', 'Pres-YZ', 'Pres-ZX', 'Pres-ZY',
       'Pres-ZZ', '#Surf*SurfTen', 'Box-Vel-XX', 'Box-Vel-YY', 'Box-Vel-ZZ',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [198]:
res_df_mi
Out[198]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
481 0.0 0.0 8303.608398 73871.851562 17496.201172 4498.496094 122485.039062 -347417.93750 -3722675.25 ... -3277627.50 331966.68750 0.0 0.0 299.042694 29.401619 1.0 1.0
1.0 1.0 8214.778320 74148.695312 18265.007812 4557.044922 122138.265625 -350537.28125 -3715491.00 ... -3266354.75 329117.40625 0.0 0.0 297.837402 29.961632 1.0 1.0
2.0 2.0 8345.959961 74711.750000 18231.474609 4924.902344 122209.250000 -347369.28125 -3723363.50 ... -3275268.00 332642.71875 0.0 0.0 298.143494 31.818558 1.0 1.0
3.0 3.0 8151.788086 74331.992188 18476.951172 4804.255859 122236.687500 -351120.81250 -3721030.75 ... -3271685.75 328977.65625 0.0 0.0 298.557281 32.175842 1.0 1.0
4.0 4.0 8236.656250 73741.585938 18342.388672 4785.673340 122148.570312 -350222.68750 -3724876.50 ... -3275797.25 330658.25000 0.0 0.0 298.325378 33.618259 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 196.0 196.0 1474.990845 13177.531250 3194.437744 835.010376 21801.701172 -307357.09375 -3602397.25 ... -3498201.25 352909.28125 0.0 0.0 297.630859 43.448444 1.0 1.0
197.0 197.0 1439.236206 13241.346680 3286.716064 815.688049 21870.714844 -306590.15625 -3602733.75 ... -3499361.75 354043.31250 0.0 0.0 296.921265 43.736523 1.0 1.0
198.0 198.0 1448.349731 13523.822266 3416.596191 866.902954 21847.775391 -303662.46875 -3605236.50 ... -3501112.50 356859.93750 0.0 0.0 296.517334 43.090897 1.0 1.0
199.0 199.0 1425.410889 13253.172852 3319.118164 898.297607 21799.173828 -305776.00000 -3605099.00 ... -3501087.50 354678.03125 0.0 0.0 297.987701 44.791225 1.0 1.0
200.0 200.0 1455.170410 13276.759766 3359.553711 884.132141 21775.613281 -306221.68750 -3604071.75 ... -3499964.75 354104.31250 0.0 0.0 297.068176 44.066669 1.0 1.0

2412 rows × 89 columns

In [199]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Volume',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [200]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [201]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[201]:
12
In [202]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [203]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(350,)
(525,)
(263,)
(481,)
(306,)
(219,)
(175,)
(131,)
(88,)
(44,)
(438,)
(394,)

Pre-evaluated RDF

Overview

In [204]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[204]:
120
In [205]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [206]:
res_df
Out[206]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-19 22:53:00.116623 2020-07-19 22:53:01.334567 12
1 surfactant_head_surfactant_tail_rdf 2020-07-19 22:53:00.116621 2020-07-19 22:53:01.334565 12
2 surfactant_head_surfactant_head_rdf 2020-07-19 22:53:00.116619 2020-07-19 22:53:01.334562 12
3 substrate_surfactant_tail_rdf 2020-07-19 22:53:00.116616 2020-07-19 22:53:01.334560 12
4 substrate_surfactant_head_rdf 2020-07-19 22:53:00.116614 2020-07-19 22:53:01.334558 12
5 substrate_substrate_rdf 2020-07-19 22:53:00.116612 2020-07-19 22:53:01.334556 12
6 counterion_surfactant_tail_rdf 2020-07-19 22:53:00.116609 2020-07-19 22:53:01.334554 12
7 counterion_surfactant_head_rdf 2020-07-19 22:53:00.116607 2020-07-19 22:53:01.334551 12
8 counterion_substrate_rdf 2020-07-19 22:53:00.116604 2020-07-19 22:53:01.334549 12
9 counterion_counterion_rdf 2020-07-19 22:53:00.116602 2020-07-19 22:53:01.334547 12

Substrate - surfactant head RDF

In [207]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [208]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[208]:
12
In [209]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [210]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [211]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [212]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[212]:
12
In [213]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [214]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [215]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [216]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[216]:
12
In [217]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [218]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - counterion RDF

In [219]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [220]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'counterion_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[220]:
12
In [221]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [222]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant head RDF

In [223]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [224]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[224]:
12
In [225]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [226]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][0][cutoff_index_inner:], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][len(data)//2][cutoff_index_inner:],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][-1][cutoff_index_inner:],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant tail - surfactant tail RDF

In [227]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [228]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_tail_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[228]:
12
In [229]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [230]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][0][cutoff_index_inner:cutoff_index_outer], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][len(data)//2][cutoff_index_inner:cutoff_index_outer],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][-1][cutoff_index_inner:cutoff_index_outer],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - substrate RDF

In [231]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [232]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_substrate_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[232]:
12
In [233]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [234]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][0][cutoff_index_inner:cutoff_index_outer], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][len(data)//2][cutoff_index_inner:cutoff_index_outer],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][-1][cutoff_index_inner:cutoff_index_outer],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()
In [ ]: